## Loading required package: Matrix
## 
## Attaching package: 'arules'
## The following objects are masked from 'package:base':
## 
##     abbreviate, write
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:arules':
## 
##     intersect, recode, setdiff, setequal, union
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
## 
## Attaching package: 'tidyr'
## The following objects are masked from 'package:Matrix':
## 
##     expand, pack, unpack
## Loading required package: carData
## 
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
## 
##     recode
## The following object is masked from 'package:DescTools':
## 
##     Recode
## The following object is masked from 'package:arules':
## 
##     recode
## 
## Attaching package: 'lubridate'
## The following objects are masked from 'package:arules':
## 
##     intersect, setdiff, union
## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo
## 
## Attaching package: 'forecast'
## The following object is masked from 'package:DescTools':
## 
##     BoxCox
## Loading required package: lattice
## 
## Attaching package: 'caret'
## The following objects are masked from 'package:DescTools':
## 
##     MAE, RMSE
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
## 
## Attaching package: 'astsa'
## The following object is masked from 'package:forecast':
## 
##     gas
## The following object is masked from 'package:DescTools':
## 
##     %^%

Part 1: First look

cancer_incidence<- read.csv(file="C:/Users/olivi/OneDrive/Documents/DataSets/13100109-eng/13100109.csv", sep = ",")

From the cancer_incidence file arbitrary features are removed first then rows with about characteristics that aren’t what is being analyzed.

Basic preliminary before moving to pandas-profiling

cancer_2<-cancer_table[-grep("Statistically different from", cancer_table$Characteristics),]
cancer_2<-cancer_2[-grep("95%", cancer_2$Characteristics),]
cancer_2<-cancer_2[grep("age-standardized", cancer_2$Characteristics),]
cancer_2<-cancer_2[-grep("Canada", cancer_2$GEO),]
cancer_2<-cancer_2[-grep("Peer", cancer_2$GEO),]
cancer_2<- cancer_2[!duplicated(cancer_2),]


cancer_2.1<- spread(cancer_2, Characteristics, VALUE)

cancer_2.2<- cancer_2.1[,1:5]
cancer_2.2<- spread(cancer_2.2, Selected.sites.of.cancer..ICD.O.3., 'Cancer incidence (age-standardized rate per 100,000 population)')


cancer_2.2<-as.data.frame(unclass(cancer_2.2), stringsAsFactors = TRUE)
#levels(cancer_2.2$GEO)

Part 2: preliminary analysis

hist(cancer_2.2[,4], breaks = 50, main = "All cancer sites")

hist(cancer_2.2[,5], breaks = 50, main = "Bronchus and lung cancer")

hist(cancer_2.2[,6], breaks = 50, main = "Colon cancer")

hist(cancer_2.2[,7], breaks = 50, main = "Female breast cancer")

hist(cancer_2.2[,8], breaks = 25, main = "Prostate cancer")

Part 3: Stratify data

##continuous
cancer_ON.2<- cancer_2.2[grep("Ontario", cancer_2.2$GEO),]
cancer_QC.2<- cancer_2.2[grep("Quebec", cancer_2.2$GEO),]
cancer_BC.2<- cancer_2.2[grep("British Columbia", cancer_2.2$GEO),]

cancer_MB.2<- cancer_2.2[grep("Manitoba", cancer_2.2$GEO),]
cancer_SK.2<- cancer_2.2[grep("Saskatchewan", cancer_2.2$GEO),]
cancer_AB.2<- cancer_2.2[grep("Alberta", cancer_2.2$GEO),]
cancer_midwest.2<- rbind(cancer_MB.2,cancer_AB.2,cancer_SK.2)

cancer_NL.2<- cancer_2.2[grep("Newfoundland", cancer_2.2$GEO),]
cancer_NB.2<- cancer_2.2[grep("New Brunswick", cancer_2.2$GEO),]
cancer_NS.2<- cancer_2.2[grep("Nova Scotia", cancer_2.2$GEO),]
cancer_PEI.2<- cancer_2.2[grep("Prince Edward Island",cancer_2.2$GEO),]
cancer_maritimes.2<- rbind(cancer_NS.2,cancer_NB.2,cancer_NL.2,cancer_PEI.2)

cancer_YT.2<- cancer_2.2[grep("Yukon", cancer_2.2$GEO),]
cancer_NWT.2<-cancer_2.2[grep("Northwest Territories", cancer_2.2$GEO),]
cancer_NVT.2<-cancer_2.2[grep("Nunavut", cancer_2.2$GEO),]
cancer_territories.2<- rbind(cancer_NWT.2,cancer_YT.2,cancer_NVT.2)

Part 4: Preliminary time-series analysis

##try time series analysis
###Just a peek

ts_ON.2<-ts(data = cancer_ON.2, frequency = n_distinct(cancer_ON.2$GEO) ,start = 2001, end = 2015)
ts.plot(ts_ON.2)

ts_QC.2<-ts(data = cancer_QC.2, frequency = n_distinct(cancer_QC.2$GEO) ,start = 2001, end = 2015)
ts.plot(ts_QC.2)

ts_BC.2<-ts(data = cancer_BC.2, frequency = n_distinct(cancer_BC.2$GEO) ,start = 2001, end = 2015)
ts.plot(ts_BC.2)

ts_midwest.2<-ts(data = cancer_midwest.2, frequency = n_distinct(cancer_midwest.2$GEO) ,start = 2001, end = 2015)
ts.plot(ts_midwest.2)

ts_maritimes.2<-ts(data = cancer_maritimes.2,frequency = n_distinct(cancer_maritimes.2$GEO) , start = 2001, end = 2015)
ts.plot(ts_maritimes.2)

ts_territories.2<-ts(data = cancer_territories.2, frequency = n_distinct(cancer_territories.2$GEO) ,start = 2001, end = 2015)
ts.plot(ts_territories.2)

####SO the frequency used is the number of distinct values in the GEO feature. 
####This is because this is the "interval" that repeats over time.
##"Argument frequency indicates the sampling frequency of the time series, with the default value 1 indicating one sample in each unit time interval."

Note that decomposition plots are in order ON, QC, BC, midwest, maritimes, territories and each group has a decomp plot for all cancer site, lung cancer and colon cancer, respectively.

plot(decompose(ts_ON.2[,4]))

plot(decompose(ts_ON.2[,5]))

plot(decompose(ts_ON.2[,6]))

plot(decompose(ts_QC.2[,4]))

plot(decompose(ts_QC.2[,5]))#$trend)

plot(decompose(ts_QC.2[,6]))

plot(decompose(ts_BC.2[,4]))

plot(decompose(ts_BC.2[,5]))

plot(decompose(ts_BC.2[,6]))

plot(decompose(ts_midwest.2[,4]))##residuals are not white noise

plot(decompose(ts_midwest.2[,5]))##residuals are not white noise

plot(decompose(ts_midwest.2[,6]))##residuals are not white noise

plot(decompose(ts_maritimes.2[,4]))

plot(decompose(ts_maritimes.2[,5]))##residuals are not white noise

plot(decompose(ts_maritimes.2[,6]))

plot(decompose(ts_territories.2[,4]))

plot(decompose(ts_territories.2[,5]))

plot(decompose(ts_territories.2[,6]))

ggplot(ts_ON.2[,4], aes(ts_ON.2[,1], ts_ON.2[,4]))+
   geom_smooth(method="loess")+
  xlab("Time Periods")+
  ylab("Cancer Incidence")+
  ggtitle("ON Smoothed Time-series - All Invasive Cancer")
## Don't know how to automatically pick scale for object of type ts. Defaulting to continuous.
## Don't know how to automatically pick scale for object of type ts. Defaulting to continuous.
## `geom_smooth()` using formula 'y ~ x'

ggplot(ts_ON.2[,5], aes(ts_ON.2[,1], ts_ON.2[,5]))+
  geom_smooth(method="loess")+
  xlab("Time Periods")+
  ylab("Cancer Incidence")+
  ggtitle("ON Smoothed Time-series - Bronchus and Lung")
## Don't know how to automatically pick scale for object of type ts. Defaulting to continuous.
## Don't know how to automatically pick scale for object of type ts. Defaulting to continuous.
## `geom_smooth()` using formula 'y ~ x'

ggplot(ts_ON.2[,6], aes(ts_ON.2[,1], ts_ON.2[,6]))+
  geom_smooth(method="loess")+
  xlab("Time Periods")+
  ylab("Cancer Incidence")+
  ggtitle("ON Smoothed Time-series - Colon and Rectum")
## Don't know how to automatically pick scale for object of type ts. Defaulting to continuous.
## Don't know how to automatically pick scale for object of type ts. Defaulting to continuous.
## `geom_smooth()` using formula 'y ~ x'

ggplot(ts_QC.2[,4], aes(ts_QC.2[,1], ts_QC.2[,4]))+
   geom_smooth(method="loess")+
  xlab("Time Periods")+
  ylab("Cancer Incidence")+
  ggtitle("QC Smoothed Time-series - All Invasive Cancer")
## Don't know how to automatically pick scale for object of type ts. Defaulting to continuous.
## Don't know how to automatically pick scale for object of type ts. Defaulting to continuous.
## `geom_smooth()` using formula 'y ~ x'

ggplot(ts_QC.2[,5], aes(ts_QC.2[,1], ts_QC.2[,5]))+
  geom_smooth(method="loess")+
  xlab("Time Periods")+
  ylab("Cancer Incidence")+
  ggtitle("QC Smoothed Time-series - Bronchus and Lung")
## Don't know how to automatically pick scale for object of type ts. Defaulting to continuous.
## Don't know how to automatically pick scale for object of type ts. Defaulting to continuous.
## `geom_smooth()` using formula 'y ~ x'

ggplot(ts_QC.2[,6], aes(ts_QC.2[,1], ts_QC.2[,6]))+
  geom_smooth(method="loess")+
  xlab("Time Periods")+
  ylab("Cancer Incidence")+
  ggtitle("QC Smoothed Time-series - Colon and Rectum")
## Don't know how to automatically pick scale for object of type ts. Defaulting to continuous.
## Don't know how to automatically pick scale for object of type ts. Defaulting to continuous.
## `geom_smooth()` using formula 'y ~ x'

ggplot(ts_BC.2[,4], aes(ts_BC.2[,1], ts_BC.2[,4]))+
   geom_smooth(method="loess")+
  xlab("Time Periods")+
  ylab("Cancer Incidence")+
  ggtitle("BC Smoothed Time-series - All Invasive Cancer")
## Don't know how to automatically pick scale for object of type ts. Defaulting to continuous.
## Don't know how to automatically pick scale for object of type ts. Defaulting to continuous.
## `geom_smooth()` using formula 'y ~ x'

ggplot(ts_BC.2[,5], aes(ts_BC.2[,1], ts_BC.2[,5]))+
  geom_smooth(method="loess")+
  xlab("Time Periods")+
  ylab("Cancer Incidence")+
  ggtitle("BC Smoothed Time-series - Bronchus and Lung")
## Don't know how to automatically pick scale for object of type ts. Defaulting to continuous.
## Don't know how to automatically pick scale for object of type ts. Defaulting to continuous.
## `geom_smooth()` using formula 'y ~ x'

ggplot(ts_BC.2[,6], aes(ts_BC.2[,1], ts_BC.2[,6]))+
  geom_smooth(method="loess")+
  xlab("Time Periods")+
  ylab("Cancer Incidence")+
  ggtitle("BC Smoothed Time-series - Colon and Rectum")
## Don't know how to automatically pick scale for object of type ts. Defaulting to continuous.
## Don't know how to automatically pick scale for object of type ts. Defaulting to continuous.
## `geom_smooth()` using formula 'y ~ x'

ggplot(ts_midwest.2[,4], aes(ts_midwest.2[,1], ts_midwest.2[,4]))+
   geom_smooth(method="loess")+
  xlab("2000 - 2015")+
  ylab("Cancer Incidence")+
  ggtitle("Midwest Smoothed Time-series - All Invasive Cancer")
## Don't know how to automatically pick scale for object of type ts. Defaulting to continuous.
## Don't know how to automatically pick scale for object of type ts. Defaulting to continuous.
## `geom_smooth()` using formula 'y ~ x'

ggplot(ts_midwest.2[,5], aes(ts_midwest.2[,1], ts_midwest.2[,5]))+
  geom_smooth(method="loess")+
  xlab("2000 - 2015")+
  ylab("Cancer Incidence")+
  ggtitle("Midwest Smoothed Time-series - Bronchus and Lung")
## Don't know how to automatically pick scale for object of type ts. Defaulting to continuous.
## Don't know how to automatically pick scale for object of type ts. Defaulting to continuous.
## `geom_smooth()` using formula 'y ~ x'

ggplot(ts_midwest.2[,6], aes(ts_midwest.2[,1], ts_midwest.2[,6]))+
  geom_smooth(method="loess")+
  xlab("2000 - 2015")+
  ylab("Cancer Incidence")+
  ggtitle("Midwest Smoothed Time-series - Colon and Rectum")
## Don't know how to automatically pick scale for object of type ts. Defaulting to continuous.
## Don't know how to automatically pick scale for object of type ts. Defaulting to continuous.
## `geom_smooth()` using formula 'y ~ x'

ggplot(ts_maritimes.2[,4], aes(ts_maritimes.2[,1], ts_maritimes.2[,4]))+
   geom_smooth(method="loess")+
  xlab("2000 - 2015")+
  ylab("Cancer Incidence")+
  ggtitle("Maritimes Smoothed Time-series - All Invasive Cancer")
## Don't know how to automatically pick scale for object of type ts. Defaulting to continuous.
## Don't know how to automatically pick scale for object of type ts. Defaulting to continuous.
## `geom_smooth()` using formula 'y ~ x'

ggplot(ts_maritimes.2[,5], aes(ts_maritimes.2[,1], ts_maritimes.2[,5]))+
  geom_smooth(method="loess")+
  xlab("2000 - 2015")+
  ylab("Cancer Incidence")+
  ggtitle("Maritimes Smoothed Time-series - Bronchus and Lung")
## Don't know how to automatically pick scale for object of type ts. Defaulting to continuous.
## Don't know how to automatically pick scale for object of type ts. Defaulting to continuous.
## `geom_smooth()` using formula 'y ~ x'

ggplot(ts_maritimes.2[,6], aes(ts_maritimes.2[,1], ts_maritimes.2[,6]))+
  geom_smooth(method="loess")+
  xlab("2000 - 2015")+
  ylab("Cancer Incidence")+
  ggtitle("Maritimes Smoothed Time-series - Colon and Rectum")
## Don't know how to automatically pick scale for object of type ts. Defaulting to continuous.
## Don't know how to automatically pick scale for object of type ts. Defaulting to continuous.
## `geom_smooth()` using formula 'y ~ x'

ggplot(ts_territories.2[,4], aes(ts_territories.2[,1], ts_territories.2[,4]))+
   geom_smooth(method="loess")+
  xlab("2000 - 2015")+
  ylab("Cancer Incidence")+
  ggtitle("Territories Smoothed Time-series - All Invasive Cancer")
## Don't know how to automatically pick scale for object of type ts. Defaulting to continuous.
## Don't know how to automatically pick scale for object of type ts. Defaulting to continuous.
## `geom_smooth()` using formula 'y ~ x'

ggplot(ts_territories.2[,5], aes(ts_territories.2[,1], ts_territories.2[,5]))+
  geom_smooth(method="loess")+
  xlab("2000 - 2015")+
  ylab("Cancer Incidence")+
  ggtitle("Territories Smoothed Time-series - Bronchus and Lung")
## Don't know how to automatically pick scale for object of type ts. Defaulting to continuous.
## Don't know how to automatically pick scale for object of type ts. Defaulting to continuous.
## `geom_smooth()` using formula 'y ~ x'

ggplot(ts_territories.2[,6], aes(ts_territories.2[,1], ts_territories.2[,6]))+
  geom_smooth(method="loess")+
  xlab("2000 - 2015")+
  ylab("Cancer Incidence")+
  ggtitle("Territories Smoothed Time-series - Colon and Rectum")
## Don't know how to automatically pick scale for object of type ts. Defaulting to continuous.
## Don't know how to automatically pick scale for object of type ts. Defaulting to continuous.
## `geom_smooth()` using formula 'y ~ x'

Part 4B: Differencing

plot(ts_ON.dif.4<-diff(ts_ON.2[,4]))

plot(ts_ON.dif.5<-diff(ts_ON.2[,5]))

plot(ts_ON.dif.6<-diff(ts_ON.2[,6]))

plot(ts_QC.dif.4<-diff(ts_QC.2[,4]))

plot(ts_QC.dif.5<-diff(ts_QC.2[,5]))

plot(ts_QC.dif.6<-diff(ts_QC.2[,6]))

plot(ts_BC.dif.4<-diff(ts_BC.2[,4]))

plot(ts_BC.dif.5<-diff(ts_BC.2[,5]))

plot(ts_BC.dif.6<-diff(ts_BC.2[,6]))

plot(ts_mid.dif.4<-diff(ts_midwest.2[,4]))

plot(ts_mid.dif.5<-diff(ts_midwest.2[,5]))

plot(ts_mid.dif.6<-diff(ts_midwest.2[,6]))

plot(ts_mar.dif.4<-diff(ts_maritimes.2[,4]))

plot(ts_mar.dif.5<-diff(ts_maritimes.2[,5]))

plot(ts_mar.dif.6<-diff(ts_maritimes.2[,6]))

plot(ts_ter.dif.4<-diff(ts_territories.2[,4]))

plot(ts_ter.dif.5<-diff(ts_territories.2[,5]))

plot(ts_ter.dif.6<-diff(ts_territories.2[,6]))

Part 5: Stationary vs Non-stationary

##tests for stationarity
#ljung-box - non-stationary will have low p-value
##other tests indicated a lag of 6 so we will see how this changes the ljung-box test
Box.test(ts_ON.2[,4], type="Ljung-Box")
## 
##  Box-Ljung test
## 
## data:  ts_ON.2[, 4]
## X-squared = 41.804, df = 1, p-value = 1.009e-10
Box.test(ts_ON.2[,5], type="Ljung-Box")
## 
##  Box-Ljung test
## 
## data:  ts_ON.2[, 5]
## X-squared = 20.283, df = 1, p-value = 6.678e-06
Box.test(ts_ON.2[,6], type="Ljung-Box")
## 
##  Box-Ljung test
## 
## data:  ts_ON.2[, 6]
## X-squared = 11.866, df = 1, p-value = 0.0005718
Box.test(ts_QC.2[,4], type="Ljung-Box")##fail to reject null of stationarity
## 
##  Box-Ljung test
## 
## data:  ts_QC.2[, 4]
## X-squared = 3.5323, df = 1, p-value = 0.06018
Box.test(ts_QC.2[,5], type="Ljung-Box")
## 
##  Box-Ljung test
## 
## data:  ts_QC.2[, 5]
## X-squared = 31.127, df = 1, p-value = 2.417e-08
Box.test(ts_QC.2[,6], type="Ljung-Box")
## 
##  Box-Ljung test
## 
## data:  ts_QC.2[, 6]
## X-squared = 31.829, df = 1, p-value = 1.684e-08
Box.test(ts_BC.2[,4], type="Ljung-Box")
## 
##  Box-Ljung test
## 
## data:  ts_BC.2[, 4]
## X-squared = 14.29, df = 1, p-value = 0.0001567
Box.test(ts_BC.2[,5], type="Ljung-Box")
## 
##  Box-Ljung test
## 
## data:  ts_BC.2[, 5]
## X-squared = 6.0415, df = 1, p-value = 0.01397
Box.test(ts_BC.2[,6], type="Ljung-Box")
## 
##  Box-Ljung test
## 
## data:  ts_BC.2[, 6]
## X-squared = 7.7952, df = 1, p-value = 0.005239
Box.test(ts_midwest.2[,4], type="Ljung-Box")
## 
##  Box-Ljung test
## 
## data:  ts_midwest.2[, 4]
## X-squared = 32.01, df = 1, p-value = 1.534e-08
Box.test(ts_midwest.2[,5], type="Ljung-Box")
## 
##  Box-Ljung test
## 
## data:  ts_midwest.2[, 5]
## X-squared = 13.422, df = 1, p-value = 0.0002486
Box.test(ts_midwest.2[,6], type="Ljung-Box")##fail to reject null of stationarity
## 
##  Box-Ljung test
## 
## data:  ts_midwest.2[, 6]
## X-squared = 2.085, df = 1, p-value = 0.1487
Box.test(ts_maritimes.2[,4], type="Ljung-Box")
## 
##  Box-Ljung test
## 
## data:  ts_maritimes.2[, 4]
## X-squared = 34.233, df = 1, p-value = 4.889e-09
Box.test(ts_maritimes.2[,5], type="Ljung-Box")
## 
##  Box-Ljung test
## 
## data:  ts_maritimes.2[, 5]
## X-squared = 41.948, df = 1, p-value = 9.374e-11
Box.test(ts_maritimes.2[,6], type="Ljung-Box")
## 
##  Box-Ljung test
## 
## data:  ts_maritimes.2[, 6]
## X-squared = 8.2406, df = 1, p-value = 0.004096
Box.test(ts_territories.2[,4], type="Ljung-Box")##fail to reject null of stationarity
## 
##  Box-Ljung test
## 
## data:  ts_territories.2[, 4]
## X-squared = 0.19385, df = 1, p-value = 0.6597
Box.test(ts_territories.2[,5], type="Ljung-Box")##fail to reject null of stationarity
## 
##  Box-Ljung test
## 
## data:  ts_territories.2[, 5]
## X-squared = 1.9755, df = 1, p-value = 0.1599
Box.test(ts_territories.2[,6], type="Ljung-Box")##fail to reject null of stationarity
## 
##  Box-Ljung test
## 
## data:  ts_territories.2[, 6]
## X-squared = 8.0948e-06, df = 1, p-value = 0.9977
#generally reject the null that these are stationary time series

#stationary might indicate that there is an equally likely 
#chance of cancer diagnosis regardless of location
#Augmented dickey-fuller t-stat test for unit root
options(warn = -1)
#non-stationary has a large p-value
adf.test(ts_ON.2[,4])
## 
##  Augmented Dickey-Fuller Test
## 
## data:  ts_ON.2[, 4]
## Dickey-Fuller = -8.561, Lag order = 8, p-value = 0.01
## alternative hypothesis: stationary
adf.test(ts_ON.2[,5])
## 
##  Augmented Dickey-Fuller Test
## 
## data:  ts_ON.2[, 5]
## Dickey-Fuller = -7.9376, Lag order = 8, p-value = 0.01
## alternative hypothesis: stationary
adf.test(ts_ON.2[,6])
## 
##  Augmented Dickey-Fuller Test
## 
## data:  ts_ON.2[, 6]
## Dickey-Fuller = -7.7782, Lag order = 8, p-value = 0.01
## alternative hypothesis: stationary
adf.test(ts_QC.2[,4])
## 
##  Augmented Dickey-Fuller Test
## 
## data:  ts_QC.2[, 4]
## Dickey-Fuller = -6.9374, Lag order = 6, p-value = 0.01
## alternative hypothesis: stationary
adf.test(ts_QC.2[,5])
## 
##  Augmented Dickey-Fuller Test
## 
## data:  ts_QC.2[, 5]
## Dickey-Fuller = -7.5649, Lag order = 6, p-value = 0.01
## alternative hypothesis: stationary
adf.test(ts_QC.2[,6])
## 
##  Augmented Dickey-Fuller Test
## 
## data:  ts_QC.2[, 6]
## Dickey-Fuller = -4.9935, Lag order = 6, p-value = 0.01
## alternative hypothesis: stationary
adf.test(ts_BC.2[,4])
## 
##  Augmented Dickey-Fuller Test
## 
## data:  ts_BC.2[, 4]
## Dickey-Fuller = -5.5942, Lag order = 6, p-value = 0.01
## alternative hypothesis: stationary
adf.test(ts_BC.2[,5])
## 
##  Augmented Dickey-Fuller Test
## 
## data:  ts_BC.2[, 5]
## Dickey-Fuller = -5.1055, Lag order = 6, p-value = 0.01
## alternative hypothesis: stationary
adf.test(ts_BC.2[,6])
## 
##  Augmented Dickey-Fuller Test
## 
## data:  ts_BC.2[, 6]
## Dickey-Fuller = -5.318, Lag order = 6, p-value = 0.01
## alternative hypothesis: stationary
adf.test(ts_midwest.2[,4])
## 
##  Augmented Dickey-Fuller Test
## 
## data:  ts_midwest.2[, 4]
## Dickey-Fuller = -7.0176, Lag order = 6, p-value = 0.01
## alternative hypothesis: stationary
adf.test(ts_midwest.2[,5])
## 
##  Augmented Dickey-Fuller Test
## 
## data:  ts_midwest.2[, 5]
## Dickey-Fuller = -7.0317, Lag order = 6, p-value = 0.01
## alternative hypothesis: stationary
adf.test(ts_midwest.2[,6])
## 
##  Augmented Dickey-Fuller Test
## 
## data:  ts_midwest.2[, 6]
## Dickey-Fuller = -6.2022, Lag order = 6, p-value = 0.01
## alternative hypothesis: stationary
adf.test(ts_maritimes.2[,4])
## 
##  Augmented Dickey-Fuller Test
## 
## data:  ts_maritimes.2[, 4]
## Dickey-Fuller = -5.9679, Lag order = 6, p-value = 0.01
## alternative hypothesis: stationary
adf.test(ts_maritimes.2[,5])
## 
##  Augmented Dickey-Fuller Test
## 
## data:  ts_maritimes.2[, 5]
## Dickey-Fuller = -6.2437, Lag order = 6, p-value = 0.01
## alternative hypothesis: stationary
adf.test(ts_maritimes.2[,6])
## 
##  Augmented Dickey-Fuller Test
## 
## data:  ts_maritimes.2[, 6]
## Dickey-Fuller = -6.7881, Lag order = 6, p-value = 0.01
## alternative hypothesis: stationary
adf.test(ts_territories.2[,4])##fail to reject null of non-stationarity
## 
##  Augmented Dickey-Fuller Test
## 
## data:  ts_territories.2[, 4]
## Dickey-Fuller = -2.4322, Lag order = 3, p-value = 0.403
## alternative hypothesis: stationary
adf.test(ts_territories.2[,5])##fail to reject null of non-stationarity
## 
##  Augmented Dickey-Fuller Test
## 
## data:  ts_territories.2[, 5]
## Dickey-Fuller = -2.6982, Lag order = 3, p-value = 0.2979
## alternative hypothesis: stationary
adf.test(ts_territories.2[,6])##fail to reject null of non-stationarity
## 
##  Augmented Dickey-Fuller Test
## 
## data:  ts_territories.2[, 6]
## Dickey-Fuller = -2.1182, Lag order = 3, p-value = 0.527
## alternative hypothesis: stationary
##reject the null of non-stationarity for all except ^^
##KPSS for level or trend stationarity
options(warn = -1)
## lower p-value indicates non stationarity
kpss.test(ts_ON.2[,4], null = "Trend")
## 
##  KPSS Test for Trend Stationarity
## 
## data:  ts_ON.2[, 4]
## KPSS Trend = 0.03125, Truncation lag parameter = 6, p-value = 0.1
kpss.test(ts_ON.2[,5], null = "Trend")
## 
##  KPSS Test for Trend Stationarity
## 
## data:  ts_ON.2[, 5]
## KPSS Trend = 0.02115, Truncation lag parameter = 6, p-value = 0.1
kpss.test(ts_ON.2[,6], null = "Trend")
## 
##  KPSS Test for Trend Stationarity
## 
## data:  ts_ON.2[, 6]
## KPSS Trend = 0.045779, Truncation lag parameter = 6, p-value = 0.1
kpss.test(ts_QC.2[,4], null = "Trend")
## 
##  KPSS Test for Trend Stationarity
## 
## data:  ts_QC.2[, 4]
## KPSS Trend = 0.027699, Truncation lag parameter = 5, p-value = 0.1
kpss.test(ts_QC.2[,5], null = "Trend")
## 
##  KPSS Test for Trend Stationarity
## 
## data:  ts_QC.2[, 5]
## KPSS Trend = 0.030488, Truncation lag parameter = 5, p-value = 0.1
kpss.test(ts_QC.2[,6], null = "Trend")
## 
##  KPSS Test for Trend Stationarity
## 
## data:  ts_QC.2[, 6]
## KPSS Trend = 0.043577, Truncation lag parameter = 5, p-value = 0.1
kpss.test(ts_BC.2[,4], null = "Trend")
## 
##  KPSS Test for Trend Stationarity
## 
## data:  ts_BC.2[, 4]
## KPSS Trend = 0.094938, Truncation lag parameter = 4, p-value = 0.1
kpss.test(ts_BC.2[,5], null = "Trend")
## 
##  KPSS Test for Trend Stationarity
## 
## data:  ts_BC.2[, 5]
## KPSS Trend = 0.033942, Truncation lag parameter = 4, p-value = 0.1
kpss.test(ts_BC.2[,6], null = "Trend")
## 
##  KPSS Test for Trend Stationarity
## 
## data:  ts_BC.2[, 6]
## KPSS Trend = 0.04278, Truncation lag parameter = 4, p-value = 0.1
kpss.test(ts_midwest.2[,4], null = "Trend")##reject the null that this is level stationary
## 
##  KPSS Test for Trend Stationarity
## 
## data:  ts_midwest.2[, 4]
## KPSS Trend = 0.096673, Truncation lag parameter = 5, p-value = 0.1
kpss.test(ts_midwest.2[,5], null = "Trend")##reject the null that this is level stationary
## 
##  KPSS Test for Trend Stationarity
## 
## data:  ts_midwest.2[, 5]
## KPSS Trend = 0.048228, Truncation lag parameter = 5, p-value = 0.1
kpss.test(ts_midwest.2[,6], null = "Trend")##reject the null that this is level/trend stationary
## 
##  KPSS Test for Trend Stationarity
## 
## data:  ts_midwest.2[, 6]
## KPSS Trend = 0.3926, Truncation lag parameter = 5, p-value = 0.01
kpss.test(ts_maritimes.2[,4], null = "Trend")##reject the null that this is level/trend stationary
## 
##  KPSS Test for Trend Stationarity
## 
## data:  ts_maritimes.2[, 4]
## KPSS Trend = 0.16538, Truncation lag parameter = 5, p-value = 0.03385
kpss.test(ts_maritimes.2[,5], null = "Trend")##reject the null that this is trend stationary
## 
##  KPSS Test for Trend Stationarity
## 
## data:  ts_maritimes.2[, 5]
## KPSS Trend = 0.3217, Truncation lag parameter = 5, p-value = 0.01
kpss.test(ts_maritimes.2[,6], null = "Trend")##reject the null that this is level/trend stationary
## 
##  KPSS Test for Trend Stationarity
## 
## data:  ts_maritimes.2[, 6]
## KPSS Trend = 0.1833, Truncation lag parameter = 5, p-value = 0.02226
kpss.test(ts_territories.2[,4], null = "Trend")
## 
##  KPSS Test for Trend Stationarity
## 
## data:  ts_territories.2[, 4]
## KPSS Trend = 0.097352, Truncation lag parameter = 3, p-value = 0.1
kpss.test(ts_territories.2[,5], null = "Trend")
## 
##  KPSS Test for Trend Stationarity
## 
## data:  ts_territories.2[, 5]
## KPSS Trend = 0.075875, Truncation lag parameter = 3, p-value = 0.1
kpss.test(ts_territories.2[,6], null = "Trend")
## 
##  KPSS Test for Trend Stationarity
## 
## data:  ts_territories.2[, 6]
## KPSS Trend = 0.065017, Truncation lag parameter = 3, p-value = 0.1
##cannot reject the null of stationarity, p-value indicates they could be stationary 

Part 6: test/train split

split.ON<-ts_split(ts.obj=ts_ON.2, sample.out =104)
train.ON<- split.ON$train
test.ON<- split.ON$test

split.QC<- ts_split(ts_QC.2, sample.out = 38)
train.QC<- split.QC$train
test.QC<- split.QC$test

split.BC<- ts_split(ts_BC.2, sample.out = 34)
train.BC<- split.BC$train
test.BC<- split.BC$test

split.mid<- ts_split(ts_midwest.2, sample.out =48)
train.mid<- split.mid$train
test.mid<- split.mid$test

split.mar<- ts_split(ts_maritimes.2, sample.out = 38)
train.mar<- split.mar$train
test.mar<- split.mar$test

split.terr<- ts_split(ts_territories.2, sample.out = 6)
train.terr<-split.terr$train
test.terr<-split.terr$test

Part 7: TSLM

#TSLM
#need to train models
##time series regression [tsr]-> tslm

tsr_ON<- tslm(formula= train.ON[,4]~trend+season,data=train.ON)
summary(tsr_ON)
## 
## Call:
## tslm(formula = train.ON[, 4] ~ trend + season, data = train.ON)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -164.354  -55.871   -3.862   57.829  181.937 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 531.961026  20.159882  26.387   <2e-16 ***
## trend         0.020031   0.015681   1.277   0.2020    
## season2     -32.846658  28.225422  -1.164   0.2450    
## season3     -28.383355  28.225208  -1.006   0.3150    
## season4     -42.603387  28.225004  -1.509   0.1317    
## season5     -13.215084  28.224807  -0.468   0.6398    
## season6     -23.060116  28.224620  -0.817   0.4143    
## season7     -30.746813  28.224442  -1.089   0.2765    
## season8     -30.383511  28.224272  -1.077   0.2822    
## season9     -14.395209  28.224110  -0.510   0.6102    
## season10    -47.040240  28.223958  -1.667   0.0961 .  
## season11    -30.451938  28.223814  -1.079   0.2811    
## season12    -55.113636  28.223679  -1.953   0.0513 .  
## season13     -0.775333  28.223553  -0.027   0.9781    
## season14    -21.878698  28.223435  -0.775   0.4385    
## season15      0.601271  28.223326   0.021   0.9830    
## season16     -1.993760  28.223226  -0.071   0.9437    
## season17      6.127875  28.223135   0.217   0.8282    
## season18     11.866178  28.223052   0.420   0.6743    
## season19    -13.712187  28.222978  -0.486   0.6273    
## season20     -6.515551  28.222912  -0.231   0.8175    
## season21     -4.735582  28.222856  -0.168   0.8668    
## season22    -23.030614  28.222808  -0.816   0.4148    
## season23      1.674355  28.222769   0.059   0.9527    
## season24    -23.137343  28.222738  -0.820   0.4127    
## season25    -15.915707  28.222716  -0.564   0.5730    
## season26    -10.969071  28.222703  -0.389   0.6977    
## season27    -23.680769  28.222699  -0.839   0.4018    
## season28      0.949200  28.222703   0.034   0.9732    
## season29      3.579169  28.222716   0.127   0.8991    
## season30      0.009137  28.222738   0.000   0.9997    
## season31     31.447440  28.222769   1.114   0.2656    
## season32     32.427408  28.222808   1.149   0.2510    
## season33     12.282377  28.222856   0.435   0.6636    
## season34     35.812346  28.222912   1.269   0.2050    
## season35    -17.307685  28.222978  -0.613   0.5400    
## season36    -10.602716  28.223052  -0.376   0.7073    
## season37    -24.697747  28.223135  -0.875   0.3819    
## season38     -9.151112  28.223226  -0.324   0.7459    
## season39    -17.687809  28.223326  -0.627   0.5311    
## season40    -13.199507  28.223435  -0.468   0.6402    
## season41    -14.361205  28.223553  -0.509   0.6111    
## season42    -27.864570  28.223679  -0.987   0.3239    
## season43    -30.684601  28.223814  -1.087   0.2774    
## season44    -59.271298  28.223958  -2.100   0.0362 *  
## season45    -12.157996  28.224110  -0.431   0.6668    
## season46    -24.628027  28.224272  -0.873   0.3833    
## season47    -18.639725  28.224442  -0.660   0.5093    
## season48    -16.484756  28.224620  -0.584   0.5594    
## season49     -8.354787  28.224807  -0.296   0.7673    
## season50    -36.183152  28.225004  -1.282   0.2004    
## season51    -16.544850  28.225208  -0.586   0.5580    
## season52    -32.689881  28.225422  -1.158   0.2473    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 70.5 on 572 degrees of freedom
## Multiple R-squared:  0.07845,    Adjusted R-squared:  -0.005324 
## F-statistic: 0.9365 on 52 and 572 DF,  p-value: 0.6026
tsr_ON.5<- tslm(formula= train.ON[,5]~trend +season ,data=train.ON)
summary(tsr_ON.5)
## 
## Call:
## tslm(formula = train.ON[, 5] ~ trend + season, data = train.ON)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -38.887 -11.034  -0.478   9.948  89.766 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  67.171548   4.788426  14.028  < 2e-16 ***
## trend         0.002057   0.003725   0.552 0.580985    
## season2      -6.305627   6.704174  -0.941 0.347331    
## season3      -4.791017   6.704123  -0.715 0.475125    
## season4      -6.134741   6.704074  -0.915 0.360537    
## season5      -2.378464   6.704028  -0.355 0.722884    
## season6       1.419479   6.703983   0.212 0.832388    
## season7      -1.857578   6.703941  -0.277 0.781813    
## season8       6.315365   6.703900   0.942 0.346568    
## season9      -2.020026   6.703862  -0.301 0.763278    
## season10     -7.230416   6.703826  -1.079 0.281243    
## season11     -0.407473   6.703792  -0.061 0.951554    
## season12      0.657137   6.703760   0.098 0.921947    
## season13     16.138413   6.703730   2.407 0.016383 *  
## season14      9.311356   6.703702   1.389 0.165377    
## season15     13.659299   6.703676   2.038 0.042051 *  
## season16      8.873909   6.703652   1.324 0.186117    
## season17     10.188519   6.703630   1.520 0.129101    
## season18     11.419795   6.703611   1.704 0.089012 .  
## season19      6.126071   6.703593   0.914 0.361181    
## season20     12.074014   6.703578   1.801 0.072209 .  
## season21      4.971957   6.703564   0.742 0.458580    
## season22      5.694900   6.703553   0.850 0.395939    
## season23      0.417843   6.703544   0.062 0.950320    
## season24     -3.200880   6.703536  -0.477 0.633195    
## season25     -7.452937   6.703531  -1.112 0.266694    
## season26     -2.229994   6.703528  -0.333 0.739513    
## season27      3.959615   6.703527   0.591 0.554971    
## season28     13.899225   6.703528   2.073 0.038580 *  
## season29     11.105501   6.703531   1.657 0.098136 .  
## season30     16.978444   6.703536   2.533 0.011583 *  
## season31     13.226387   6.703544   1.973 0.048972 *  
## season32     24.765997   6.703553   3.694 0.000242 ***
## season33     20.555607   6.703564   3.066 0.002269 ** 
## season34     34.878550   6.703578   5.203 2.74e-07 ***
## season35     11.584826   6.703593   1.728 0.084501 .  
## season36     10.349436   6.703611   1.544 0.123175    
## season37      2.764046   6.703630   0.412 0.680259    
## season38      1.820322   6.703652   0.272 0.786072    
## season39      1.518265   6.703676   0.226 0.820907    
## season40      3.391208   6.703702   0.506 0.613142    
## season41      5.105818   6.703730   0.762 0.446590    
## season42      3.795427   6.703760   0.566 0.571504    
## season43     -2.039963   6.703792  -0.304 0.761010    
## season44     -0.875353   6.703826  -0.131 0.896157    
## season45     -2.610744   6.703862  -0.389 0.697097    
## season46     -1.054467   6.703900  -0.157 0.875071    
## season47      4.776809   6.703941   0.713 0.476423    
## season48      5.883085   6.703983   0.878 0.380556    
## season49      8.889362   6.704028   1.326 0.185378    
## season50     -1.462695   6.704074  -0.218 0.827367    
## season51     -1.506419   6.704123  -0.225 0.822293    
## season52    -12.800143   6.704174  -1.909 0.056726 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 16.75 on 572 degrees of freedom
## Multiple R-squared:  0.2277, Adjusted R-squared:  0.1575 
## F-statistic: 3.243 on 52 and 572 DF,  p-value: 4.909e-12
tsr_ON.6<- tslm(formula= train.ON[,6]~trend+season,data=train.ON)
summary(tsr_ON.6)
## 
## Call:
## tslm(formula = train.ON[, 6] ~ trend + season, data = train.ON)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -32.634  -9.496  -0.547   8.381  46.230 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 65.452219   3.788519  17.276  < 2e-16 ***
## trend        0.001627   0.002947   0.552  0.58104    
## season2     -2.029191   5.304224  -0.383  0.70219    
## season3      0.485848   5.304184   0.092  0.92705    
## season4     -5.482446   5.304146  -1.034  0.30175    
## season5      1.649260   5.304109   0.311  0.75596    
## season6      1.005966   5.304074   0.190  0.84964    
## season7     -2.462327   5.304040  -0.464  0.64266    
## season8     -0.238955   5.304008  -0.045  0.96408    
## season9      4.776085   5.303978   0.900  0.36825    
## season10    -3.675542   5.303949  -0.693  0.48860    
## season11     3.456164   5.303922   0.652  0.51491    
## season12    -6.845464   5.303897  -1.291  0.19735    
## season13     8.211243   5.303873   1.548  0.12214    
## season14     6.234615   5.303851   1.175  0.24029    
## season15    10.016321   5.303831   1.889  0.05946 .  
## season16     9.764694   5.303812   1.841  0.06613 .  
## season17     3.529734   5.303795   0.666  0.50599    
## season18     5.978107   5.303779   1.127  0.26016    
## season19     1.526479   5.303765   0.288  0.77360    
## season20     1.683185   5.303753   0.317  0.75109    
## season21     2.606558   5.303742   0.491  0.62329    
## season22     0.521598   5.303733   0.098  0.92169    
## season23     2.469970   5.303726   0.466  0.64160    
## season24     1.401677   5.303720   0.264  0.79166    
## season25    -1.633284   5.303716  -0.308  0.75823    
## season26     2.081755   5.303713   0.393  0.69483    
## season27    -0.278205   5.303713  -0.052  0.95818    
## season28     4.170168   5.303713   0.786  0.43203    
## season29     3.060207   5.303716   0.577  0.56417    
## season30     5.541913   5.303720   1.045  0.29651    
## season31    12.248619   5.303726   2.309  0.02127 *  
## season32    13.455325   5.303733   2.537  0.01145 *  
## season33     7.695365   5.303742   1.451  0.14735    
## season34    17.185404   5.303753   3.240  0.00126 ** 
## season35     6.225444   5.303765   1.174  0.24097    
## season36     0.007150   5.303779   0.001  0.99892    
## season37    -2.869477   5.303795  -0.541  0.58870    
## season38     1.345562   5.303812   0.254  0.79982    
## season39     3.260602   5.303831   0.615  0.53896    
## season40     1.700641   5.303851   0.321  0.74860    
## season41     5.157347   5.303873   0.972  0.33128    
## season42     5.322387   5.303897   1.003  0.31605    
## season43     0.479093   5.303922   0.090  0.92806    
## season44     2.460799   5.303949   0.464  0.64286    
## season45    -0.707495   5.303978  -0.133  0.89393    
## season46    -2.359122   5.304008  -0.445  0.65665    
## season47     0.014250   5.304040   0.003  0.99786    
## season48    -0.279043   5.304074  -0.053  0.95806    
## season49     1.327663   5.304109   0.250  0.80244    
## season50    -0.390631   5.304146  -0.074  0.94132    
## season51    -0.667258   5.304184  -0.126  0.89994    
## season52     1.122781   5.304224   0.212  0.83243    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 13.25 on 572 degrees of freedom
## Multiple R-squared:  0.1141, Adjusted R-squared:  0.03357 
## F-statistic: 1.417 on 52 and 572 DF,  p-value: 0.0329
tsr_QC<- tslm(formula = train.QC[,4]~trend+season, data = train.QC)
summary(tsr_QC)
## 
## Call:
## tslm(formula = train.QC[, 4] ~ trend + season, data = train.QC)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -217.67  -75.29  -11.71   70.79  536.62 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 551.16989   31.61431  17.434  < 2e-16 ***
## trend        -0.05646    0.10552  -0.535  0.59318    
## season2      35.07650   42.14527   0.832  0.40620    
## season3      45.79129   42.14315   1.087  0.27848    
## season4      36.15608   42.14130   0.858  0.39189    
## season5       7.31254   42.13972   0.174  0.86240    
## season6       6.64400   42.13840   0.158  0.87487    
## season7     -50.34953   42.13734  -1.195  0.23348    
## season8      14.77359   42.13655   0.351  0.72623    
## season9      11.63839   42.13602   0.276  0.78266    
## season10     -0.98015   42.13575  -0.023  0.98146    
## season11     25.90964   42.13575   0.615  0.53928    
## season12     28.13277   42.13602   0.668  0.50508    
## season13     72.87256   42.13655   1.729  0.08521 .  
## season14    137.78736   42.13734   3.270  0.00126 ** 
## season15    214.11882   42.13840   5.081  8.3e-07 ***
## season16     36.40028   42.13972   0.864  0.38869    
## season17     38.13174   42.14130   0.905  0.36658    
## season18     37.24653   42.14315   0.884  0.37781    
## season19     37.31966   42.14527   0.886  0.37690    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 105.3 on 209 degrees of freedom
## Multiple R-squared:  0.2268, Adjusted R-squared:  0.1565 
## F-statistic: 3.227 on 19 and 209 DF,  p-value: 1.698e-05
tsr_QC.5<- tslm(formula= train.QC[,5]~trend+season, data=train.QC)
summary(tsr_QC.5)
## 
## Call:
## tslm(formula = train.QC[, 5] ~ trend + season, data = train.QC)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -111.157  -31.041   -1.991   24.030  239.457 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  94.67382   14.79562   6.399 1.01e-09 ***
## trend        -0.03235    0.04939  -0.655    0.513    
## season2      16.07120   19.72415   0.815    0.416    
## season3      24.02855   19.72316   1.218    0.224    
## season4      18.15256   19.72229   0.920    0.358    
## season5      19.05158   19.72155   0.966    0.335    
## season6      11.33392   19.72093   0.575    0.566    
## season7     -12.12540   19.72044  -0.615    0.539    
## season8      11.15695   19.72007   0.566    0.572    
## season9       1.23097   19.71982   0.062    0.950    
## season10     20.48831   19.71970   1.039    0.300    
## season11     16.53733   19.71970   0.839    0.403    
## season12     24.21968   19.71982   1.228    0.221    
## season13     32.67702   19.72007   1.657    0.099 .  
## season14     87.12604   19.72044   4.418 1.60e-05 ***
## season15    111.38338   19.72093   5.648 5.26e-08 ***
## season16     43.09073   19.72155   2.185    0.030 *  
## season17     10.43141   19.72229   0.529    0.597    
## season18      6.13876   19.72316   0.311    0.756    
## season19     17.57944   19.72415   0.891    0.374    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 49.26 on 209 degrees of freedom
## Multiple R-squared:  0.2696, Adjusted R-squared:  0.2032 
## F-statistic:  4.06 on 19 and 209 DF,  p-value: 1.799e-07
tsr_QC.6<- tslm(formula= train.QC[,6]~trend+season, data=train.QC)
summary(tsr_QC.6)
## 
## Call:
## tslm(formula = train.QC[, 6] ~ trend + season, data = train.QC)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -64.377 -15.152  -2.902  11.001 301.299 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 76.07255   11.76680   6.465 7.03e-10 ***
## trend       -0.03949    0.03928  -1.006  0.31579    
## season2      3.23353   15.68641   0.206  0.83689    
## season3      3.39803   15.68563   0.217  0.82871    
## season4      4.89585   15.68494   0.312  0.75525    
## season5     -5.24798   15.68435  -0.335  0.73826    
## season6      8.01651   15.68386   0.511  0.60980    
## season7     -4.13566   15.68346  -0.264  0.79227    
## season8      2.16216   15.68317   0.138  0.89048    
## season9      2.34332   15.68297   0.149  0.88137    
## season10     4.92448   15.68287   0.314  0.75383    
## season11    21.43898   15.68287   1.367  0.17308    
## season12    12.90347   15.68297   0.823  0.41158    
## season13    49.96797   15.68317   3.186  0.00166 ** 
## season14    39.25746   15.68346   2.503  0.01308 *  
## season15    37.24695   15.68386   2.375  0.01846 *  
## season16    39.36145   15.68435   2.510  0.01285 *  
## season17     8.16761   15.68494   0.521  0.60311    
## season18     4.68210   15.68563   0.298  0.76562    
## season19     6.92159   15.68641   0.441  0.65949    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 39.18 on 209 degrees of freedom
## Multiple R-squared:  0.1574, Adjusted R-squared:  0.08076 
## F-statistic: 2.054 on 19 and 209 DF,  p-value: 0.007473
tsr_BC<- tslm(formula = train.BC[,4]~trend+season, data = train.BC)
summary(tsr_BC)
## 
## Call:
## tslm(formula = train.BC[, 4] ~ trend + season, data = train.BC)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -150.20  -50.08  -10.09   54.13  147.69 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 504.02295   20.04419  25.146   <2e-16 ***
## trend        -0.09096    0.07831  -1.162    0.247    
## season2      11.81394   26.49212   0.446    0.656    
## season3       5.44657   26.49050   0.206    0.837    
## season4      11.87086   26.48911   0.448    0.655    
## season5       8.03682   26.48796   0.303    0.762    
## season6     -16.64721   26.48703  -0.629    0.530    
## season7     -35.35625   26.48634  -1.335    0.184    
## season8     -10.25696   26.48587  -0.387    0.699    
## season9      -3.52433   26.48564  -0.133    0.894    
## season10     37.19997   26.48564   1.405    0.162    
## season11     -1.90907   26.48587  -0.072    0.943    
## season12     -1.98477   26.48634  -0.075    0.940    
## season13     -4.14381   26.48703  -0.156    0.876    
## season14     12.75548   26.48796   0.482    0.631    
## season15    -14.42022   26.48911  -0.544    0.587    
## season16     -7.41259   26.49050  -0.280    0.780    
## season17    -14.47996   26.49212  -0.547    0.585    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 66.16 on 187 degrees of freedom
## Multiple R-squared:  0.06281,    Adjusted R-squared:  -0.02239 
## F-statistic: 0.7373 on 17 and 187 DF,  p-value: 0.7618
tsr_BC.5<- tslm(formula = train.BC[,5]~trend+season, data = train.BC)
summary(tsr_BC.5)
## 
## Call:
## tslm(formula = train.BC[, 5] ~ trend + season, data = train.BC)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -24.243  -9.546   0.736   8.012  36.597 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 67.15930    3.86712  17.367  < 2e-16 ***
## trend       -0.01529    0.01511  -1.012  0.31288    
## season2      2.02572    5.11112   0.396  0.69231    
## season3      4.10768    5.11080   0.804  0.42258    
## season4      9.83130    5.11054   1.924  0.05591 .  
## season5      2.26325    5.11031   0.443  0.65836    
## season6     -3.42146    5.11013  -0.670  0.50397    
## season7     -7.07284    5.11000  -1.384  0.16797    
## season8      4.31745    5.10991   0.845  0.39924    
## season9      6.68274    5.10987   1.308  0.19254    
## season10     6.42303    5.10987   1.257  0.21033    
## season11     1.80498    5.10991   0.353  0.72431    
## season12     7.81194    5.11000   1.529  0.12801    
## season13    16.29389    5.11013   3.189  0.00168 ** 
## season14    16.17585    5.11031   3.165  0.00181 ** 
## season15     7.56614    5.11054   1.480  0.14042    
## season16     3.74809    5.11080   0.733  0.46425    
## season17    -0.45329    5.11112  -0.089  0.92943    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 12.76 on 187 degrees of freedom
## Multiple R-squared:  0.1924, Adjusted R-squared:  0.119 
## F-statistic:  2.62 on 17 and 187 DF,  p-value: 0.0007868
tsr_BC.6<-tslm(formula = train.BC[,6]~trend+season, data=train.BC)
summary(tsr_BC.6)
## 
## Call:
## tslm(formula = train.BC[, 6] ~ trend + season, data = train.BC)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -21.231  -7.982  -1.225   9.265  24.632 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 64.958208   3.470864  18.715   <2e-16 ***
## trend        0.007501   0.013560   0.553    0.581    
## season2      1.350485   4.587392   0.294    0.769    
## season3     -2.140349   4.587111  -0.467    0.641    
## season4     -2.289516   4.586870  -0.499    0.618    
## season5     -3.247017   4.586670  -0.708    0.480    
## season6     -3.112850   4.586510  -0.679    0.498    
## season7     -5.662018   4.586389  -1.235    0.219    
## season8     -4.669518   4.586309  -1.018    0.310    
## season9      1.189648   4.586269   0.259    0.796    
## season10    -1.101186   4.586269  -0.240    0.811    
## season11    -4.025353   4.586309  -0.878    0.381    
## season12    -5.132854   4.586389  -1.119    0.265    
## season13    -5.148688   4.586510  -1.123    0.263    
## season14    -2.481189   4.586670  -0.541    0.589    
## season15    -6.313689   4.586870  -1.376    0.170    
## season16    -5.737856   4.587111  -1.251    0.213    
## season17    -1.912024   4.587392  -0.417    0.677    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 11.46 on 187 degrees of freedom
## Multiple R-squared:  0.04368,    Adjusted R-squared:  -0.04326 
## F-statistic: 0.5024 on 17 and 187 DF,  p-value: 0.9497
tsr_mid<-tslm(formula = train.mid[,4]~trend+season, data=train.mid)
summary(tsr_mid)
## 
## Call:
## tslm(formula = train.mid[, 4] ~ trend + season, data = train.mid)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -95.146 -15.649   2.233  17.974  75.647 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 537.30148    8.72014  61.616  < 2e-16 ***
## trend        -0.05290    0.02086  -2.537 0.011773 *  
## season2     -57.57935   11.80755  -4.876 1.87e-06 ***
## season3      72.70688   11.80716   6.158 2.73e-09 ***
## season4      -0.90688   11.80681  -0.077 0.938833    
## season5     -45.44565   11.80650  -3.849 0.000149 ***
## season6      66.72392   11.80622   5.652 4.11e-08 ***
## season7       1.75182   11.80598   0.148 0.882153    
## season8     -60.59528   11.80578  -5.133 5.55e-07 ***
## season9      80.69929   11.80561   6.836 5.64e-11 ***
## season10      0.06053   11.80548   0.005 0.995913    
## season11    -48.81157   11.80539  -4.135 4.78e-05 ***
## season12     71.81633   11.80534   6.083 4.12e-09 ***
## season13      0.37756   11.80532   0.032 0.974510    
## season14    -56.26953   11.80534  -4.766 3.10e-06 ***
## season15     73.47503   11.80539   6.224 1.90e-09 ***
## season16     -2.13873   11.80548  -0.181 0.856378    
## season17    -46.93583   11.80561  -3.976 9.06e-05 ***
## season18     66.04207   11.80578   5.594 5.54e-08 ***
## season19      4.41164   11.80598   0.374 0.708944    
## season20    -56.61879   11.80622  -4.796 2.71e-06 ***
## season21     80.90078   11.80650   6.852 5.11e-11 ***
## season22     -1.56299   11.80681  -0.132 0.894784    
## season23    -47.24342   11.80716  -4.001 8.19e-05 ***
## season24     67.23448   11.80755   5.694 3.30e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 29.49 on 264 degrees of freedom
## Multiple R-squared:  0.7694, Adjusted R-squared:  0.7484 
## F-statistic:  36.7 on 24 and 264 DF,  p-value: < 2.2e-16
tsr_mid.5<-tslm(formula = train.mid[,5]~trend+season, data=train.mid)
summary(tsr_mid.5)
## 
## Call:
## tslm(formula = train.mid[, 5] ~ trend + season, data = train.mid)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -28.284  -5.026  -0.936   2.799  56.624 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  86.013597   3.920819  21.938  < 2e-16 ***
## trend        -0.061738   0.009377  -6.584 2.46e-10 ***
## season2      -9.598993   5.309006  -1.808 0.071735 .  
## season3      12.079412   5.308832   2.275 0.023687 *  
## season4      -2.392183   5.308675  -0.451 0.652635    
## season5      -9.038778   5.308534  -1.703 0.089803 .  
## season6       8.247960   5.308409   1.554 0.121441    
## season7       4.368032   5.308302   0.823 0.411326    
## season8      -7.253563   5.308211  -1.366 0.172951    
## season9      19.349842   5.308136   3.645 0.000322 ***
## season10     -2.888420   5.308078  -0.544 0.586794    
## season11     -8.693348   5.308037  -1.638 0.102661    
## season12      6.701723   5.308012   1.263 0.207859    
## season13     -1.253205   5.308004  -0.236 0.813540    
## season14    -12.433133   5.308012  -2.342 0.019907 *  
## season15     13.020272   5.308037   2.453 0.014817 *  
## season16     -2.101323   5.308078  -0.396 0.692519    
## season17     -8.222918   5.308136  -1.549 0.122552    
## season18      7.838820   5.308211   1.477 0.140939    
## season19      2.050558   5.308302   0.386 0.699591    
## season20    -11.862703   5.308409  -2.235 0.026274 *  
## season21     19.340702   5.308534   3.643 0.000324 ***
## season22     -2.805893   5.308675  -0.529 0.597563    
## season23     -9.319155   5.308832  -1.755 0.080350 .  
## season24      7.425917   5.309006   1.399 0.163065    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 13.26 on 264 degrees of freedom
## Multiple R-squared:  0.4133, Adjusted R-squared:   0.36 
## F-statistic: 7.749 on 24 and 264 DF,  p-value: < 2.2e-16
tsr_mid.6<- tslm(formula = train.mid[,6]~trend+season, data=train.mid)
summary(tsr_mid.6)
## 
## Call:
## tslm(formula = train.mid[, 6] ~ trend + season, data = train.mid)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -20.571  -4.962  -1.073   3.943  38.074 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  76.024694   2.614624  29.077  < 2e-16 ***
## trend        -0.036881   0.006253  -5.898 1.12e-08 ***
## season2      -9.307616   3.540346  -2.629 0.009065 ** 
## season3      10.770932   3.540230   3.042 0.002583 ** 
## season4      -1.058854   3.540125  -0.299 0.765099    
## season5     -13.271973   3.540031  -3.749 0.000218 ***
## season6      13.964909   3.539948   3.945 0.000102 ***
## season7       2.135123   3.539876   0.603 0.546918    
## season8      -9.586329   3.539815  -2.708 0.007208 ** 
## season9      15.058886   3.539766   4.254 2.92e-05 ***
## season10     -0.304233   3.539727  -0.086 0.931573    
## season11    -13.825685   3.539699  -3.906 0.000119 ***
## season12     16.094529   3.539683   4.547 8.30e-06 ***
## season13      1.764744   3.539677   0.499 0.618504    
## season14    -10.256709   3.539683  -2.898 0.004075 ** 
## season15     14.955173   3.539699   4.225 3.29e-05 ***
## season16     -1.682946   3.539727  -0.475 0.634863    
## season17    -15.212732   3.539766  -4.298 2.43e-05 ***
## season18     14.915816   3.539815   4.214 3.45e-05 ***
## season19      3.744364   3.539876   1.058 0.291129    
## season20     -9.060421   3.539948  -2.559 0.011041 *  
## season21     17.909793   3.540031   5.059 7.89e-07 ***
## season22     -1.594992   3.540125  -0.451 0.652686    
## season23    -13.933111   3.540230  -3.936 0.000106 ***
## season24     13.703770   3.540346   3.871 0.000137 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 8.842 on 264 degrees of freedom
## Multiple R-squared:  0.6453, Adjusted R-squared:  0.6131 
## F-statistic: 20.01 on 24 and 264 DF,  p-value: < 2.2e-16
tsr_mar<-tslm(formula = train.mar[,4]~trend+season, data= train.mar)
summary(tsr_mar)
## 
## Call:
## tslm(formula = train.mar[, 4] ~ trend + season, data = train.mar)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -161.83  -59.24   -9.80   71.50  163.80 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 592.33249   24.12047  24.557   <2e-16 ***
## trend        -0.23634    0.08051  -2.935   0.0037 ** 
## season2      13.83730   32.15518   0.430   0.6674    
## season3      -2.63470   32.15357  -0.082   0.9348    
## season4      14.80997   32.15216   0.461   0.6455    
## season5       3.92964   32.15095   0.122   0.9028    
## season6       8.90764   32.14994   0.277   0.7820    
## season7       4.11064   32.14913   0.128   0.8984    
## season8       8.43031   32.14853   0.262   0.7934    
## season9       9.66665   32.14813   0.301   0.7639    
## season10     13.20299   32.14792   0.411   0.6817    
## season11      6.85599   32.14792   0.213   0.8313    
## season12     10.00899   32.14813   0.311   0.7559    
## season13     15.02866   32.14853   0.467   0.6406    
## season14     17.58166   32.14913   0.547   0.5850    
## season15      2.30967   32.14994   0.072   0.9428    
## season16      3.27100   32.15095   0.102   0.9191    
## season17     17.24900   32.15216   0.536   0.5922    
## season18     -3.83133   32.15357  -0.119   0.9053    
## season19      5.01334   32.15518   0.156   0.8763    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 80.31 on 209 degrees of freedom
## Multiple R-squared:  0.04617,    Adjusted R-squared:  -0.04054 
## F-statistic: 0.5325 on 19 and 209 DF,  p-value: 0.9458
tsr_mar.5<- tslm(formula = train.mar[,5]~trend+season, data = train.mar)
summary(tsr_mar.5)
## 
## Call:
## tslm(formula = train.mar[, 5] ~ trend + season, data = train.mar)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -40.636 -12.896  -1.701  10.654  69.180 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 92.54475    5.79665  15.965   <2e-16 ***
## trend       -0.01377    0.01935  -0.712    0.478    
## season2      0.83811    7.72756   0.108    0.914    
## season3     -4.21479    7.72717  -0.545    0.586    
## season4      4.50731    7.72683   0.583    0.560    
## season5     -1.15392    7.72654  -0.149    0.881    
## season6      0.55151    7.72630   0.071    0.943    
## season7     -0.42639    7.72611  -0.055    0.956    
## season8      2.42904    7.72596   0.314    0.754    
## season9     -1.93219    7.72587  -0.250    0.803    
## season10     3.05658    7.72582   0.396    0.693    
## season11    -1.28799    7.72582  -0.167    0.868    
## season12     0.50911    7.72587   0.066    0.948    
## season13    -1.26045    7.72596  -0.163    0.871    
## season14     1.60331    7.72611   0.208    0.836    
## season15    -4.36625    7.72630  -0.565    0.573    
## season16    -3.34415    7.72654  -0.433    0.666    
## season17    -0.01372    7.72683  -0.002    0.999    
## season18    -2.50829    7.72717  -0.325    0.746    
## season19    -1.74452    7.72756  -0.226    0.822    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 19.3 on 209 degrees of freedom
## Multiple R-squared:  0.01802,    Adjusted R-squared:  -0.07125 
## F-statistic: 0.2019 on 19 and 209 DF,  p-value: 0.9999
tsr_mar.6<- tslm(formula = train.mar[,6]~trend+season, data = train.mar)
summary(tsr_mar.6)
## 
## Call:
## tslm(formula = train.mar[, 6] ~ trend + season, data = train.mar)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -25.682 -11.005  -1.441  10.746  32.365 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 85.52787    4.21134  20.309  < 2e-16 ***
## trend       -0.06586    0.01406  -4.685 5.03e-06 ***
## season2      1.05300    5.61417   0.188    0.851    
## season3     -1.08947    5.61389  -0.194    0.846    
## season4      1.30139    5.61365   0.232    0.817    
## season5     -0.26608    5.61343  -0.047    0.962    
## season6      2.50811    5.61326   0.447    0.655    
## season7     -0.46769    5.61312  -0.083    0.934    
## season8      2.68150    5.61301   0.478    0.633    
## season9      1.60570    5.61294   0.286    0.775    
## season10     5.05489    5.61291   0.901    0.369    
## season11     1.73742    5.61291   0.310    0.757    
## season12     2.86161    5.61294   0.510    0.611    
## season13     0.56914    5.61301   0.101    0.919    
## season14     1.37667    5.61312   0.245    0.806    
## season15     0.85920    5.61326   0.153    0.878    
## season16     0.40839    5.61343   0.073    0.942    
## season17    -0.85908    5.61365  -0.153    0.879    
## season18    -0.52655    5.61389  -0.094    0.925    
## season19     0.34764    5.61417   0.062    0.951    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 14.02 on 209 degrees of freedom
## Multiple R-squared:  0.1061, Adjusted R-squared:  0.0248 
## F-statistic: 1.305 on 19 and 209 DF,  p-value: 0.1823
tsr_terr<- tslm(formula = train.terr[,4]~trend+season, data = train.terr)
summary(tsr_terr)
## 
## Call:
## tslm(formula = train.terr[, 4] ~ trend + season, data = train.terr)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -95.528 -14.160   2.437  17.632  58.849 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 578.6782    13.5179  42.808  < 2e-16 ***
## trend        -2.3827     0.5165  -4.613 5.74e-05 ***
## season2     -30.8407    13.4214  -2.298 0.028044 *  
## season3      57.4170    13.4214   4.278 0.000152 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 33.52 on 33 degrees of freedom
## Multiple R-squared:  0.6526, Adjusted R-squared:  0.621 
## F-statistic: 20.66 on 3 and 33 DF,  p-value: 1.02e-07
tsr_terr.5<- tslm(formula = train.terr[,5]~trend+season, data = train.terr)
summary(tsr_terr.5)
## 
## Call:
## tslm(formula = train.terr[, 5] ~ trend + season, data = train.terr)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -28.2407  -9.4529   0.6109   8.8013  25.5798 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  90.1626     5.5612  16.213   <2e-16 ***
## trend        -0.2434     0.2125  -1.145   0.2603    
## season2      -8.2518     5.5215  -1.494   0.1446    
## season3      10.6082     5.5215   1.921   0.0634 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 13.79 on 33 degrees of freedom
## Multiple R-squared:  0.2717, Adjusted R-squared:  0.2054 
## F-statistic: 4.103 on 3 and 33 DF,  p-value: 0.01402
tsr_terr.6<-tslm(formula = train.terr[,6]~trend+season, data = train.terr)

Part 8: TSLM forecasting

test_forecast(forecast.obj = fcast.ON.4, actual = ts_ON.2[,4], test = test.ON[,4])
test_forecast(forecast.obj = fcast.ON.5, actual = ts_ON.2[,5], test = test.ON[,5])
test_forecast(forecast.obj = fcast.ON.6, actual = ts_ON.2[,6], test = test.ON[,6])
test_forecast(forecast.obj = fcast.QC.4, actual = ts_QC.2[,4], test = test.QC[,4])
test_forecast(forecast.obj = fcast.QC.5, actual = ts_QC.2[,5], test = test.QC[,5])
test_forecast(forecast.obj = fcast.QC.6, actual = ts_QC.2[,6], test = test.QC[,6])
test_forecast(forecast.obj = fcast.BC.4, actual = ts_BC.2[,4], test = test.BC[,4])
test_forecast(forecast.obj = fcast.BC.5, actual = ts_BC.2[,5], test = test.BC[,5])
test_forecast(forecast.obj = fcast.BC.6, actual = ts_BC.2[,6], test = test.BC[,6])
test_forecast(forecast.obj = fcast.mid.4, actual = ts_midwest.2[,4], test = test.mid[,4])
test_forecast(forecast.obj = fcast.mid.5, actual = ts_midwest.2[,5], test = test.mid[,5])
test_forecast(forecast.obj = fcast.mid.6, actual = ts_midwest.2[,6], test = test.mid[,6])
test_forecast(forecast.obj = fcast.mar.4, actual = ts_maritimes.2[,4], test = test.mar[,4])
test_forecast(forecast.obj = fcast.mar.5, actual = ts_maritimes.2[,5], test = test.mar[,5])
test_forecast(forecast.obj = fcast.mar.6, actual = ts_maritimes.2[,6], test = test.mar[,6])
test_forecast(forecast.obj = fcast.terr.4, actual = ts_territories.2[,4], test = test.terr[,4])
test_forecast(forecast.obj = fcast.terr.5, actual = ts_territories.2[,5], test = test.terr[,5])
test_forecast(forecast.obj = fcast.terr.6, actual = ts_territories.2[,6], test = test.terr[,6])

Part 9: ARIMA modeling and Forecasts

##ARIMA model or SARIMA
##trying non seasonal ARIMA models
##with the currently non differenced data

##staring with auto.arima
####################All cancer sites
#(auto.arima(train.ON[,4], seasonal = TRUE))
#(Arima(train.ON[,4], order = c(1,0,5), seasonal = c(1,0,1)))#model from auto.arima
#AIC=6659.09   AICc=6659.45   BIC=6703.47

#(Arima(train.ON[,4], order = c(1,1,6), seasonal = c(0,1,1)))
#(arima_ON.m<- Arima(train.ON[,4], order = c(1,1,6), seasonal = c(1,1,1)))
#AIC=6127.88   AICc=6128.27   BIC=6171.37

#(arima_ON.m<- Arima(train.ON[,4], order = c(1,1,8), seasonal = c(0,1,1)))
#AIC=6089.67   AICc=6090.14   BIC=6137.51

#arima_ON.m<- Arima(train.ON[,4], order = c(1,1,8), seasonal = c(0,1,2))####BEST AIC

arima_ON.m<- arima(train.ON[,4], order = c(1,1,6), seasonal = list(order=c(0,1,2),period=52))#aic = 5944.63

#AIC=5865.88   AICc=5866.44   BIC=5918.07

#################Lungs
#(arima_ON.5<- auto.arima(train.ON[,5], seasonal = TRUE))
#(arima_ON.5<- Arima(train.ON[,5], order = c(1,0,5), seasonal = c(0,0,1)))#model from auto.arima
#AIC=5183.45   AICc=5183.75   BIC=5223.39

#(arima_ON.5.m<- Arima(train.ON[,5], order = c(1,0,5), seasonal = c(0,1,1)))
# AIC=4791.34   AICc=4791.6   BIC=4826.15

#(arima_ON.5.m<- Arima(train.ON[,5], order = c(1,0,7), seasonal = c(0,1,1))) 
#AIC=4742.87   AICc=4743.26   BIC=4786.38

#(arima_ON.5.m<- Arima(train.ON[,5], order = c(1,0,8), seasonal = c(0,1,2)))
#AIC=4506.99   AICc=4507.55   BIC=4559.2

#arima_ON.5.m<- Arima(train.ON[,5], order = c(1,0,9), seasonal = c(0,1,2))##BEST AIC
#AIC=4497.12   AICc=4497.77   BIC=4553.68

arima_ON.5.m<- arima(train.ON[,5], order = c(1,0,8), seasonal = list(order=c(0,1,2),period=52))
#aic = 4506.99

#############################COLON
#(arima_ON.6<- auto.arima(train.ON[,6], seasonal = FALSE)) #False higher AIC than True
#(arima_ON.6.m<- Arima(train.ON[,6], order = c(1,0,5), seasonal = c(0,0,0)))#model from auto.arima
#AIC=4753.36   AICc=4753.59   BIC=4788.86

#(arima_ON.6.m<- Arima(train.ON[,6], order = c(1,0,6), seasonal = c(0,0,0)))
#AIC=4689.1   AICc=4689.39   BIC=4729.04

#(arima_ON.6.m<- Arima(train.ON[,6], order = c(1,0,5), seasonal = c(0,1,1)))
#AIC=4437.09   AICc=4437.35   BIC=4471.9

#(arima_ON.6.m<- Arima(train.ON[,6], order = c(1,0,6), seasonal = c(0,1,1)))
#AIC=4352.96   AICc=4353.28   BIC=4392.12

#(arima_ON.6.m<- Arima(train.ON[,6], order = c(1,0,7), seasonal = c(0,1,1)))
#AIC=4335.71   AICc=4336.1   BIC=4379.22

#arima_ON.6.m<- Arima(train.ON[,6], order = c(1,0,8), seasonal = list(order=c(0,1,2), period=52))##BEST AIC

#AIC=4117.67   AICc=4118.23   BIC=4169.88

arima_ON.6.m<- arima(train.ON[,6], order = c(1,0,6), seasonal = list(order=c(0,1,2), period=52))
#aic = 4150.05
test_forecast(forecast.obj = fcast_ON.4, actual = ts_ON.2[,4], test = test.ON[,4])
test_forecast(forecast.obj = fcast_ON.5, actual = ts_ON.2[,5], test = test.ON[,5])
test_forecast(forecast.obj = fcast_ON.6, actual = ts_ON.2[,6], test = test.ON[,6])
#######################All cancer sites
#(arima_QC<- auto.arima(train.QC[,4], seasonal = TRUE))
#(arima_QC<- Arima(train.QC[,4], order = c(1,0,1), seasonal = c(0,0,1)))##auto.arima model
#AIC=2813.12   AICc=2813.38   BIC=2830.28

#(arima_QC.m<- Arima(train.QC[,4], order = c(1,0,3), seasonal = c(0,0,1)))
#AIC=2799.16   AICc=2799.66   BIC=2823.19

#(arima_QC.m<- Arima(train.QC[,4], order = c(1,0,3), seasonal = c(0,1,1)))
#AIC=2570.48   AICc=2570.89   BIC=2590.56

#(arima_QC.m<- Arima(train.QC[,4], order = c(1,1,4), seasonal = c(0,1,1)))
#AIC=2568.64   AICc=2569.2   BIC=2592.04

#arima_QC.m<- Arima(train.QC[,4], order = c(1,1,4), seasonal = c(0,1,2))##BEST AIC
#AIC=2526.05   AICc=2526.77   BIC=2552.79

arima_QC.m<- arima(train.QC[,4], order = c(1,1,4), seasonal = list(order =c(0,1,2),period=19))

##########################LUNGS
#(arima_QC.5<- auto.arima(train.QC[,5], seasonal = TRUE))
#(arima_QC.5<-Arima(train.QC[,5], order = c(0,0,1), seasonal = c(0,0,2)))##auto.arima model
#AIC=2407.76   AICc=2408.02   BIC=2424.92

#(arima_QC.5.m<-Arima(train.QC[,5], order = c(1,0,4), seasonal = c(0,1,2)))
#AIC=2190.36   AICc=2191.08   BIC=2217.14

#(arima_QC.5.m<-Arima(train.QC[,5], order = c(1,1,4), seasonal = c(0,1,2)))
#AIC=2188.35   AICc=2189.07   BIC=2215.09

#(arima_QC.5.m<-Arima(train.QC[,5], order = c(1,1,4), seasonal = c(0,1,3)))
#AIC=2178.95   AICc=2179.85   BIC=2209.03

#(arima_QC.5.m<-Arima(train.QC[,5], order = c(1,1,4), seasonal = c(0,1,4)))
#AIC=2125.93   AICc=2127.04   BIC=2159.36
#arima_QC.5.m<-Arima(train.QC[,5], order = c(1,1,4), seasonal = c(0,1,5))
#AIC=2104.38   AICc=2105.72   BIC=2141.14

arima_QC.5.m<- arima(train.QC[,5], order = c(1,1,4), seasonal = list(order =c(0,1,5),period=19))
#aic = 2104.38

############################# Colon
#(arima_QC.6<- auto.arima(train.QC[,6], seasonal = TRUE))
#(arima_QC.6.m<-Arima(train.QC[,6], order = c(0,0,3), seasonal = c(0,0,1)))##auto.arima model
#AIC=2258.1   AICc=2258.48   BIC=2278.7

#(arima_QC.6.m<-Arima(train.QC[,6], order = c(4,0,3), seasonal = c(0,0,1)))
#AIC=2250.51   AICc=2251.52   BIC=2284.85

#(arima_QC.6.m<-Arima(train.QC[,6], order = c(1,0,3), seasonal = c(0,1,1)))
#AIC=2114.69   AICc=2115.11   BIC=2134.78

#(arima_QC.6.m<-Arima(train.QC[,6], order = c(1,0,3), seasonal = c(0,2,1)))
#AIC=2110.41   AICc=2110.86   BIC=2129.92

#(arima_QC.6.m<-Arima(train.QC[,6], order = c(1,0,3), seasonal = c(0,2,4)))
#AIC=2005.74   AICc=2006.74   BIC=2035.01

#arima_QC.6.m<-Arima(train.QC[,6], order = c(1,0,3), seasonal = c(0,2,5))##BEST AIC
#AIC=1980.22   AICc=1981.44   BIC=2012.74

arima_QC.6.m<- arima(train.QC[,6], order = c(1,0,3), seasonal = list(order =c(0,2,5),period=19))
#aic = 1980.22
test_forecast(forecast.obj = fcast_QC.4, actual = ts_QC.2[,4], test = test.QC[,4])
test_forecast(forecast.obj = fcast_QC.5, actual = ts_QC.2[,5], test = test.QC[,5])
test_forecast(forecast.obj = fcast_QC.6, actual = ts_QC.2[,6], test = test.QC[,6])
##############################ALL cancer sites
#(arima_BC<-auto.arima(train.BC[,4], seasonal = TRUE))
#(arima_BC<- Arima(train.BC[,4], order = c(1,0,5), seasonal = c(0,0,1)))#auto.arima model
#AIC=2187.68   AICc=2188.6   BIC=2217.58

#(arima_BC.m<- Arima(train.BC[,4], order = c(1,0,5), seasonal = c(0,1,1)))
#AIC=2041.8   AICc=2042.6   BIC=2067.69

#(arima_BC.m<- Arima(train.BC[,4], order = c(1,0,6), seasonal = c(0,1,1)))
#AIC=2020.01   AICc=2021.02   BIC=2049.14

#(arima_BC.m<- Arima(train.BC[,4], order = c(1,0,9), seasonal = c(0,1,2)))
#AIC=1917.45   AICc=1919.54   BIC=1959.52

#(arima_BC.m<- Arima(train.BC[,4], order = c(1,0,9), seasonal = c(0,1,3)))
#AIC=1906.1   AICc=1908.53   BIC=1951.41

#(arima_BC.m<- Arima(train.BC[,4], order = c(1,0,9), seasonal = c(0,1,4)))
#AIC=1847.06   AICc=1849.85   BIC=1895.61

#(arima_BC.m<- Arima(train.BC[,4], order = c(1,0,9), seasonal = c(0,1,5)))
#AIC=1823.34   AICc=1826.52   BIC=1875.12

#(arima_BC.m<- Arima(train.BC[,4], order = c(1,0,9), seasonal = c(0,1,6)))
#AIC=1815.42   AICc=1819.02   BIC=1870.44

#(arima_BC.m<- Arima(train.BC[,4], order = c(1,0,9), seasonal = c(0,1,7)))
#AIC=1784.21   AICc=1788.26   BIC=1842.46

#arima_BC.m<- Arima(train.BC[,4], order = c(1,0,9), seasonal = c(0,1,8))
#AIC=1779.17   AICc=1783.69   BIC=1840.66
##takes forever to run
arima_BC.m<- arima(train.BC[,4], order = c(1,0,8), seasonal = list(order=c(0,1,6),period=17))
#aic = 1831.82

#############################################LUNGS
(arima_BC.<-auto.arima(train.BC[,5], seasonal = FALSE))##better than seasonal=TRUE
## Series: train.BC[, 5] 
## ARIMA(4,0,2) with non-zero mean 
## 
## Coefficients:
##           ar1      ar2     ar3      ar4     ma1     ma2     mean
##       -0.2212  -0.5840  0.3718  -0.3439  0.6443  0.9447  70.1662
## s.e.   0.0686   0.0633  0.0632   0.0679  0.0321  0.0347   0.9781
## 
## sigma^2 = 95.71:  log likelihood = -757.62
## AIC=1531.24   AICc=1531.98   BIC=1557.82
#AIC=1531.24   AICc=1531.98   BIC=1557.82

#(arima_BC.5.m<- Arima(train.BC[,5], order = c(2,0,4), seasonal = c(0,1,1)))
#AIC=1358.72   AICc=1359.53   BIC=1384.62

#(arima_BC.5.m<- Arima(train.BC[,5], order = c(2,0,4), seasonal = c(1,1,1)))
#AIC=1334.85   AICc=1335.86   BIC=1363.98

#(arima_BC.5.m<- Arima(train.BC[,5], order = c(2,0,4), seasonal = c(1,1,4)))
#AIC=1242.85   AICc=1244.64   BIC=1281.69

#arima_BC.5.m<- Arima(train.BC[,5], order = c(2,0,4), seasonal = c(2,1,4))###BEST AIC
#AIC=1148.16   AICc=1150.25   BIC=1190.23

arima_BC.5.m<- arima(train.BC[,5], order = c(2,0,4), seasonal = list(order=c(2,1,4),period=17))
#aic = 1148.16
################################# Colon
#(arima_BC.6<-auto.arima(train.BC[,6], seasonal = TRUE))
#(arima_BC.6.m<- Arima(train.BC[,6], order = c(1,0,5), seasonal = c(0,0,2)))#auto.arima model
#AIC=1444.7   AICc=1445.83   BIC=1477.93

#(arima_BC.6.m<- Arima(train.BC[,6], order = c(1,0,5), seasonal = c(0,1,2)))
#AIC=1361.42   AICc=1362.43   BIC=1390.55

#(arima_BC.6.m<- Arima(train.BC[,6], order = c(1,0,5), seasonal = c(0,1,4)))
#AIC=1302.51   AICc=1304.01   BIC=1338.11

arima_BC.6.m<- arima(train.BC[,6], order = c(1,0,6), seasonal = list(order=c(0,1,4),perid=17))##BEST AIC
#aic = 1285
test_forecast(forecast.obj = fcast_BC.4, actual = ts_BC.2[,4], test = test.BC[,4])
test_forecast(forecast.obj = fcast_BC.5, actual = ts_BC.2[,5], test = test.BC[,5])
test_forecast(forecast.obj = fcast_BC.6, actual = ts_BC.2[,6], test = test.BC[,6])
##################################ALL cancer sites
#(arima_mid<- auto.arima(train.mid[,4], seasonal= TRUE))

#(arima_mid.m<- Arima(train.mid[,4], order = c(2,0,2), seasonal = c(1,1,0)))#auto.arima model
#AIC=2581.3   AICc=2581.62   BIC=2602.77

#(arima_mid.m<- Arima(train.mid[,4], order = c(2,0,3), seasonal = c(1,1,0)))
#AIC=2577.45   AICc=2577.88   BIC=2602.51

#(arima_mid.m<- Arima(train.mid[,4], order = c(2,0,3), seasonal = c(1,1,1)))
#AIC=2555.31   AICc=2555.87   BIC=2583.95

#(arima_mid.m<- Arima(train.mid[,4], order = c(2,0,3), seasonal = c(1,2,1)))
#AIC=2442.21   AICc=2442.83   BIC=2470.09

#(arima_mid.m<- Arima(train.mid[,4], order = c(2,0,3), seasonal = c(1,2,3)))
#AIC=2419.11   AICc=2420.07   BIC=2453.96

#(arima_mid.m<- Arima(train.mid[,4], order = c(2,0,3), seasonal = c(1,2,5)))
#AIC=2412.21   AICc=2413.58   BIC=2454.02

#arima_mid.m<- Arima(train.mid[,4], order = c(2,0,3), seasonal = c(1,2,6))##no noticeable improvement from 6->8 
#AIC=2405.48   AICc=2407.08   BIC=2450.78

arima_mid.m<- arima(train.mid[,4], order = c(2,0,3), seasonal = list(order=c(1,2,6),period=24))
#aic = 2405.48


###########################################LUNGS
#(arima_mid.5<- auto.arima(train.mid[,5], seasonal= TRUE))
#(arima_mid.5.m<- Arima(train.mid[,5], order = c(5,1,2), seasonal = c(2,0,1)))##auto.arima model
#AIC=2239.1   AICc=2240.24   BIC=2283.06

#(arima_mid.5.m<- Arima(train.mid[,5], order = c(5,1,3), seasonal = c(2,0,1)))
#AIC=2169.99   AICc=2171.12   BIC=2213.94

#(arima_mid.5.m<- Arima(train.mid[,5], order = c(5,1,3), seasonal = c(2,1,1)))
#AIC=2022.71   AICc=2023.95   BIC=2065.62

#(arima_mid.5.m<- Arima(train.mid[,5], order = c(5,1,3), seasonal = c(2,1,2)))
#AIC=2020.75   AICc=2022.2   BIC=2067.23

#(arima_mid.5.m<- Arima(train.mid[,5], order = c(5,1,3), seasonal = c(2,2,2)))
#AIC=2000.26   AICc=2001.87   BIC=2045.51

#(arima_mid.5.m<- Arima(train.mid[,5], order = c(5,2,3), seasonal = c(2,2,2)))
#AIC=1991.14   AICc=1992.75   BIC=2036.33 ##also solved warning in previous version 

#arima_mid.5.m<- Arima(train.mid[,5], order = c(5,2,4), seasonal = c(2,2,2))##BEST AIC
#AIC=1971.84   AICc=1973.72   BIC=2020.52

arima_mid.5.m<- arima(train.mid[,5], order = c(5,2,4), seasonal = list(order=c(2,2,2),period=24))

########################################## Colon
#(arima_mid.6<- auto.arima(train.mid[,6], seasonal= TRUE))
#(arima_mid.5.m<- Arima(train.mid[,6], order = c(2,0,0), seasonal = c(2,1,1))) ##auto.arima model
#AIC=1904.11   AICc=1904.55   BIC=1929.17

#(arima_mid.5.m<- Arima(train.mid[,6], order = c(2,0,2), seasonal = c(2,1,1)))
#AIC=1898.68   AICc=1899.24   BIC=1927.32

#(arima_mid.5.m<- Arima(train.mid[,6], order = c(2,1,2), seasonal = c(2,1,1)))
#AIC=1891.71   AICc=1892.28   BIC=1920.32

#(arima_mid.5.m<- Arima(train.mid[,6], order = c(2,1,2), seasonal = c(2,1,2)))
#AIC=1887.73   AICc=1888.44   BIC=1919.92

#(arima_mid.5.m<- Arima(train.mid[,6], order = c(2,1,2), seasonal = c(2,2,2)))
#AIC=1851.87   AICc=1852.65   BIC=1883.19

#arima_mid.6.m<- Arima(train.mid[,6], order = c(2,1,2), seasonal = c(2,3,2)) ##BEST AIC
#AIC=1840.36   AICc=1841.23   BIC=1870.74

arima_mid.6.m<- arima(train.mid[,6], order = c(2,1,2), seasonal = list(order=c(2,3,2),period=24))
test_forecast(forecast.obj = fcast_mid.4, actual = ts_midwest.2[,4], test = test.mid[,4])
test_forecast(forecast.obj = fcast_mid.5, actual = ts_midwest.2[,5], test = test.mid[,5])
test_forecast(forecast.obj = fcast_mid.6, actual = ts_midwest.2[,6], test = test.mid[,6])
##forecasts estimate higher incidence than the actual values
#(arima_mar<- auto.arima(train.mar[,4], seasonal = TRUE))
#(arima_mar.m <- Arima(train.mar[,4], order = c(4,1,0) ,seasonal = c(0,0,2))) #auto.arima model
#AIC=2201.95   AICc=2202.61   BIC=2229.38

#(arima_mar.m <- Arima(train.mar[,4], order = c(4,1,1) ,seasonal = c(0,0,2)))
#AIC=2198.94   AICc=2199.6   BIC=2226.37

#(arima_mar.m <- Arima(train.mar[,4], order = c(4,1,3) ,seasonal = c(0,0,2)))
#AIC=2131.73   AICc=2132.74   BIC=2166.02

#(arima_mar.m <- Arima(train.mar[,4], order = c(4,1,3) ,seasonal = c(0,1,2)))
#AIC=2002.72   AICc=2003.83   BIC=2036.14

#(arima_mar.m <- Arima(train.mar[,4], order = c(4,1,3) ,seasonal = c(0,2,2)))
#AIC=1916.7   AICc=1917.93   BIC=1949.17

#arima_mar.m <- Arima(train.mar[,4], order = c(4,1,3) ,seasonal = c(0,2,3))## BEST AIC
#AIC=1915.16   AICc=1916.65   BIC=1950.88
arima_mar.m <- arima(train.mar[,4], order = c(4,1,3) ,seasonal = list(order=c(0,2,3),period=19))

######################################LUNGS
#(arima_mar.5<- auto.arima(train.mar[,5], seasonal = TRUE))
#(arima_mar.5.m<- Arima(train.mar[,5], order = c(5,0,1), seasonal = c(0,0,2)))##auto.arima model
#AIC=1663.09   AICc=1664.1   BIC=1697.42

#(arima_mar.5.m<- Arima(train.mar[,5], order = c(5,0,1), seasonal = c(0,1,2)))
#AIC=1581.21   AICc=1582.11   BIC=1611.33

#(arima_mar.5.m<- Arima(train.mar[,5], order = c(5,0,2), seasonal = c(0,1,2)))
#AIC=1558.89   AICc=1559.99   BIC=1592.36

#(arima_mar.5.m<- Arima(train.mar[,5], order = c(5,0,2), seasonal = c(0,2,2)))
#AIC=1516.86   AICc=1518.08   BIC=1549.38

#(arima_mar.5.m<- Arima(train.mar[,5], order = c(5,0,2), seasonal = c(0,3,2)))
#AIC=1474.06   AICc=1475.43   BIC=1505.54

#arima_mar.5.m<- Arima(train.mar[,5], order = c(5,0,3), seasonal = c(0,3,2))##BEST AIC
#AIC=1472.04   AICc=1473.69   BIC=1506.66

arima_mar.5.m<- arima(train.mar[,5], order = c(5,0,3), seasonal = list(order=c(0,3,2),period=19))

############################### Colon
#(arima_mar.6<- auto.arima(train.mar[,6], seasonal = TRUE))
#(arima_mar.6.m<- Arima(train.mar[,6], order = c(0,1,5), seasonal = c(0,0,1))) #auto.arima model
#AIC=1668.67   AICc=1669.33   BIC=1696.1

#(arima_mar.6.m<- Arima(train.mar[,6], order = c(0,1,5), seasonal = c(0,1,1)))
#AIC=1596.2   AICc=1596.75   BIC=1619.59

#(arima_mar.6.m<- Arima(train.mar[,6], order = c(0,1,6), seasonal = c(0,1,1)))
#AIC=1566.62   AICc=1567.34   BIC=1593.36

#(arima_mar.6.m<- Arima(train.mar[,6], order = c(0,1,10), seasonal = c(0,1,4)))
#AIC=1497.4   AICc=1499.89   BIC=1547.53

#(arima_mar.6.m<- Arima(train.mar[,6], order = c(0,1,10), seasonal = c(0,1,5)))
#AIC=1491.06   AICc=1493.89   BIC=1544.54

#(arima_mar.6.m<- Arima(train.mar[,6], order = c(0,1,10), seasonal = c(0,2,5)))
#AIC=1471.18   AICc=1474.32   BIC=1523.13

#(arima_mar.6.m<- Arima(train.mar[,6], order = c(0,1,10), seasonal = c(0,3,5)))
#AIC=1469.12   AICc=1472.65   BIC=1519.38

#arima_mar.6.m<- Arima(train.mar[,6], order = c(0,1,10), seasonal = c(0,3,6))##BEST AIC
#AIC=1462.55   AICc=1466.55   BIC=1515.95

arima_mar.6.m<- arima(train.mar[,6], order = c(1,1,8), seasonal = list(order= c(0,3,6),period=19))

#aic = 1477.06
test_forecast(forecast.obj = fcast_mar.4, actual = ts_maritimes.2[,4], test = test.mar[,4])
test_forecast(forecast.obj = fcast_mar.5, actual = ts_maritimes.2[,5], test = test.mar[,5])
test_forecast(forecast.obj = fcast_mar.6, actual = ts_maritimes.2[,6], test = test.mar[,6])
#####################ALL cancer sites
#(arima_terr<-auto.arima(train.terr[,4], seasonal = TRUE, stepwise = FALSE, approximation = FALSE))

arima_terr.m<- arima(train.terr[,4], order = c(0,0,0), seasonal = list(order=c(0,1,0),period=3))##auto.arima model
#AIC=347.42   AICc=347.55   BIC=348.95
##BEST AIC

############################### LUNGS
#(arima_terr.5<-auto.arima(train.terr[,5], seasonal = TRUE))

arima_terr.5.m<- arima(train.terr[,5], order= c(0,0,0), seasonal = list(order=c(2,0,0),period=3))##auto.arima model
#AIC=283.85   AICc=285.1   BIC=290.29
##BEST AIC

################################# COlon
#(arima_terr.6<-auto.arima(train.terr[,6], seasonal = TRUE))
#>  ARIMA(2,1,0) 
#>  AIC=346.54   AICc=347.29   BIC=351.29
#(arima_terr.6.m<- Arima(train.terr[,6], order= c(2,1,1), seasonal = c(0,0,0)))
#AIC=348.5   AICc=289.19   BIC=294.24

arima_terr.6.m<- arima(train.terr[,6], order= c(2,1,1), seasonal = list(order=c(0,1,0),period=3))
#AIC=322.89   AICc=267.86   BIC=272.42
test_forecast(forecast.obj = fcast_terr.4, actual = ts_territories.2[,4], test = test.terr[,4])
test_forecast(forecast.obj = fcast_terr.5, actual = ts_territories.2[,5], test = test.terr[,5])
test_forecast(forecast.obj = fcast_terr.6, actual = ts_territories.2[,6], test = test.terr[,6])
check_res(arima_ON.m)
check_res(arima_ON.5.m)
check_res(arima_ON.6.m)
check_res(arima_QC.m)
check_res(arima_QC.5.m)
check_res(arima_QC.6.m)
check_res(arima_BC.m)
check_res(arima_BC.5.m)
check_res(arima_BC.6.m)
check_res(arima_mid.m)
check_res(arima_mid.5.m)
check_res(arima_mid.6.m)
check_res(arima_mar.m)
check_res(arima_mar.5.m)
check_res(arima_mar.6.m)
check_res(arima_terr.m)
check_res(arima_terr.5.m)
check_res(arima_terr.6.m)

Part 10: Effectiveness - Accuracy Measures

#####################ARIMA
print("Ontario ARIMA models")
## [1] "Ontario ARIMA models"
accuracy(fcast_ON.4, test.ON[,4])
##                      ME     RMSE      MAE         MPE     MAPE      MASE
## Training set -0.7342033 32.45072 25.10075 -0.70603094 4.891489 0.2526362
## Test set      6.3203872 53.99158 45.75886  0.03411064 8.663069 0.4605577
##                     ACF1 Theil's U
## Training set -0.09777807        NA
## Test set     -0.37092812 0.4930353
accuracy(fcast_ON.5, test.ON[,5])
##                      ME      RMSE      MAE       MPE      MAPE      MASE
## Training set  0.2171178  9.235597 6.750731 -1.725505  9.915781 0.3098923
## Test set     -0.7523922 10.092238 7.897227 -3.375201 11.847799 0.3625222
##                      ACF1 Theil's U
## Training set -0.008318187        NA
## Test set      0.065808248 0.5861256
accuracy(fcast_ON.6, test.ON[,6])
##                      ME     RMSE      MAE        MPE      MAPE      MASE
## Training set  0.1858216 6.819994 5.167771 -0.9890724  7.843747 0.2872683
## Test set     -1.4478105 9.954216 7.912281 -4.4579768 12.520854 0.4398313
##                     ACF1 Theil's U
## Training set -0.02687924        NA
## Test set     -0.36120760  0.477058
################TSLM
print("Ontario TSLM models")
## [1] "Ontario TSLM models"
accuracy(fcast.ON.4, test.ON[,4])
##                         ME     RMSE      MAE       MPE     MAPE      MASE
## Training set -3.544617e-15 67.44486 56.59353 -1.650801 10.92957 0.5696074
## Test set     -2.089438e+00 67.98010 57.51472 -2.036162 11.02530 0.5788791
##                    ACF1 Theil's U
## Training set -0.2916615        NA
## Test set     -0.2922284  0.628614
accuracy(fcast.ON.5, test.ON[,5])
##                         ME     RMSE      MAE       MPE     MAPE      MASE
## Training set  4.089062e-16 16.01967 12.62884 -5.296708 19.01862 0.5797269
## Test set     -1.224907e+00 13.31527 10.58071 -5.123035 16.09178 0.4857075
##                     ACF1 Theil's U
## Training set 0.006094595        NA
## Test set     0.121342684 0.8063011
accuracy(fcast.ON.6, test.ON[,6])
##                         ME     RMSE      MAE       MPE     MAPE      MASE
## Training set  1.588854e-16 12.67448 10.24572 -3.410354 15.48518 0.5695433
## Test set     -1.890459e+00 12.62819 10.45913 -6.331039 16.62284 0.5814069
##                   ACF1 Theil's U
## Training set -0.187263        NA
## Test set     -0.308252 0.6236434
#####################ARIMA
print("Quebec ARIMA models")
## [1] "Quebec ARIMA models"
accuracy(fcast_QC.4, test.QC[,4])
##                     ME     RMSE      MAE         MPE     MAPE      MASE
## Training set -3.402105 81.03498 58.78771 -2.11893804 9.923888 0.4261876
## Test set      5.704239 69.29902 56.19228 -0.05398666 9.875326 0.4073718
##                     ACF1 Theil's U
## Training set -0.01218758        NA
## Test set     -0.16173147 0.5462605
accuracy(fcast_QC.5, test.QC[,5])
##                      ME     RMSE      MAE       MPE     MAPE      MASE
## Training set -0.7977578 24.50994 16.87430      -Inf      Inf 0.2848762
## Test set     -1.0018107 21.84940 16.00479 -3.434945 15.49545 0.2701968
##                      ACF1 Theil's U
## Training set -0.004871119        NA
## Test set      0.140461623  0.428896
accuracy(fcast_QC.6, test.QC[,6])
##                     ME     RMSE       MAE       MPE     MAPE      MASE
## Training set -1.380253 25.09385 13.542098 -4.913560 15.87571 0.3871488
## Test set     -2.787314 11.30498  8.680159 -5.462049 12.75794 0.2481531
##                      ACF1 Theil's U
## Training set -0.002606181        NA
## Test set     -0.103577339 0.5255439
################TSLM
print("Quebec TSLM models")
## [1] "Quebec TSLM models"
accuracy(fcast.QC.4, test.QC[,4])
##                         ME      RMSE      MAE       MPE     MAPE      MASE
## Training set -2.978838e-15 100.55372 76.58136 -2.482115 12.97538 0.5551845
## Test set     -7.617443e+00  78.51699 63.78263 -2.726196 11.39109 0.4623988
##                     ACF1 Theil's U
## Training set  0.03674577        NA
## Test set     -0.16572215  0.616928
accuracy(fcast.QC.5, test.QC[,5])
##                         ME     RMSE      MAE       MPE     MAPE      MASE
## Training set  6.478510e-17 47.05953 33.27729      -Inf      Inf 0.5617955
## Test set     -6.473464e+00 35.38083 27.75142 -12.19978 28.24806 0.4685064
##                    ACF1 Theil's U
## Training set 0.26967228        NA
## Test set     0.07461657 0.7020082
accuracy(fcast.QC.6, test.QC[,6])
##                         ME     RMSE      MAE        MPE     MAPE      MASE
## Training set  8.682651e-16 37.42596 20.82961  -9.129001 23.57742 0.5954883
## Test set     -7.012978e+00 21.82977 16.59623 -13.103774 24.36062 0.4744620
##                   ACF1 Theil's U
## Training set 0.2870133        NA
## Test set     0.2124646 0.9684529
#####################ARIMA
print("BC ARIMA models")
## [1] "BC ARIMA models"
accuracy(fcast_BC.4, test.BC[,4])
##                     ME     RMSE      MAE        MPE     MAPE      MASE
## Training set -1.361911 18.03714 13.28185 -0.5247616 2.780829 0.1419872
## Test set     15.131115 38.39510 29.96387  2.2237066 5.722222 0.3203234
##                      ACF1 Theil's U
## Training set  0.006225926        NA
## Test set     -0.224283986 0.3725037
accuracy(fcast_BC.5, test.BC[,5])
##                      ME     RMSE      MAE        MPE     MAPE      MASE
## Training set -0.2626233 3.532935 2.614178 -0.6773540 3.806916 0.1508118
## Test set      0.5055518 5.768628 3.911727 -0.1145212 5.622419 0.2256673
##                     ACF1 Theil's U
## Training set -0.03900797        NA
## Test set      0.35746108 0.3048858
accuracy(fcast_BC.6, test.BC[,6])
##                      ME     RMSE      MAE        MPE      MAPE      MASE
## Training set 0.49799296 5.060612 3.858002 -0.2063229  6.245824 0.2406451
## Test set     0.04078948 7.955122 6.564351 -1.7535620 10.635308 0.4094552
##                     ACF1 Theil's U
## Training set -0.01352428        NA
## Test set     -0.07725039 0.4543273
################TSLM
print("BC TSLM models")
## [1] "BC TSLM models"
accuracy(fcast.BC.4, test.BC[,4])
##                        ME     RMSE      MAE       MPE     MAPE      MASE
## Training set 2.216742e-15 63.18969 53.03207 -1.635163 10.85632 0.5669299
## Test set     2.219328e+01 68.35999 54.26209  2.855463 10.38967 0.5800792
##                    ACF1 Theil's U
## Training set -0.2793209        NA
## Test set     -0.2879627 0.6722001
accuracy(fcast.BC.5, test.BC[,5])
##                         ME     RMSE       MAE       MPE     MAPE      MASE
## Training set -3.462677e-17 12.19117  9.984786 -3.129079 14.88163 0.5760218
## Test set      3.083233e+00 13.23183 10.216855  1.274293 14.25824 0.5894098
##                     ACF1 Theil's U
## Training set 0.008420675        NA
## Test set     0.337046593 0.7132829
accuracy(fcast.BC.6, test.BC[,6])
##                        ME     RMSE      MAE       MPE     MAPE      MASE
## Training set 1.385071e-16 10.94196 9.155316 -3.194198 15.25255 0.5710682
## Test set     3.837316e-01 10.72559 9.187866 -2.197730 15.01381 0.5730985
##                    ACF1 Theil's U
## Training set -0.2001443        NA
## Test set     -0.2110036  0.612524
#####################ARIMA
print("Midwest ARIMA models")
## [1] "Midwest ARIMA models"
accuracy(fcast_mid.4, test.mid[,4])
##                      ME     RMSE      MAE        MPE     MAPE      MASE
## Training set   2.846237 24.62107 17.02520  0.3594439 3.180817 0.5353263
## Test set     -13.350272 24.62355 21.39882 -2.4703575 4.015196 0.6728470
##                      ACF1 Theil's U
## Training set -0.008567216        NA
## Test set      0.261144498 0.2343431
accuracy(fcast_mid.5, test.mid[,5])
##                       ME     RMSE      MAE        MPE      MAPE      MASE
## Training set  0.04218503 9.173924 6.403709 -0.5009628  8.460447 0.4780637
## Test set     -6.69998552 9.846109 7.976579 -9.9995206 11.864569 0.5954851
##                     ACF1 Theil's U
## Training set -0.02733384        NA
## Test set      0.19375542 0.7186801
accuracy(fcast_mid.6, test.mid[,6])
##                      ME     RMSE      MAE        MPE     MAPE      MASE
## Training set -0.2642139 9.413829 6.084825 -0.6310913 8.881973 0.6569747
## Test set      1.8358503 7.256528 5.450974  2.8121503 8.141033 0.5885382
##                      ACF1 Theil's U
## Training set  0.004247999        NA
## Test set     -0.351646059  0.425444
################TSLM
print("Midwest TSLM models")
## [1] "Midwest TSLM models"
accuracy(fcast.mid.4, test.mid[,4])
##                         ME     RMSE      MAE        MPE     MAPE      MASE
## Training set -3.148720e-15 28.18532 21.47508 -0.2770566 4.035879 0.6752449
## Test set     -6.186426e-01 17.76653 14.92947 -0.3653245 2.907564 0.4694300
##                   ACF1 Theil's U
## Training set 0.3992370        NA
## Test set     0.3140968 0.1658009
accuracy(fcast.mid.5, test.mid[,5])
##                        ME      RMSE      MAE       MPE      MAPE      MASE
## Training set -9.89025e-17 12.672916 8.035323 -2.151047 10.050690 0.5998706
## Test set      2.15347e+00  6.528972 5.472111  2.648482  7.955706 0.4085160
##                   ACF1 Theil's U
## Training set 0.4113309        NA
## Test set     0.4080632 0.4293385
accuracy(fcast.mid.6, test.mid[,6])
##                         ME     RMSE      MAE       MPE     MAPE      MASE
## Training set -1.981194e-16 8.451017 6.096435 -1.317389 8.470573 0.6582282
## Test set     -2.438379e+00 4.659719 3.558358 -3.995623 5.784766 0.3841936
##                    ACF1 Theil's U
## Training set 0.36855627        NA
## Test set     0.08218148 0.2675836
#####################ARIMA
print("Maritimes ARIMA models")
## [1] "Maritimes ARIMA models"
accuracy(fcast_mar.4, test.mar[,4])
##                      ME     RMSE      MAE         MPE     MAPE      MASE
## Training set  0.3748883 26.79171 19.22015 -0.03847801 3.467458 0.1669458
## Test set     -1.7818321 45.60905 38.05424 -1.11150570 7.069351 0.3305383
##                     ACF1 Theil's U
## Training set -0.00384057        NA
## Test set     -0.06842116 0.2647759
accuracy(fcast_mar.5, test.mar[,5])
##                      ME     RMSE       MAE         MPE      MAPE      MASE
## Training set  0.6745681 10.36877  7.255549   0.3295996  8.282918 0.2955188
## Test set     -6.4261880 32.74793 26.085511 -16.0184045 32.872583 1.0624638
##                      ACF1 Theil's U
## Training set  0.006177642        NA
## Test set     -0.384584439 0.5029747
accuracy(fcast_mar.6, test.mar[,6])
##                      ME      RMSE       MAE       MPE     MAPE      MASE
## Training set  0.2048485  9.438797  6.581651 -0.560942  8.84405 0.3244095
## Test set     -2.7075505 14.791491 11.519138 -7.199916 18.14988 0.5677782
##                     ACF1 Theil's U
## Training set  0.02110998        NA
## Test set     -0.18235521 0.6367498
################TSLM
print("Maritimes TSLM models")
## [1] "Maritimes TSLM models"
accuracy(fcast.mar.4, test.mar[,4])
##                         ME     RMSE      MAE       MPE     MAPE      MASE
## Training set -7.199985e-15 76.71853 63.82659 -1.792592 11.27545 0.5543963
## Test set      5.181360e+00 95.28617 80.87036 -1.992469 14.96103 0.7024381
##                    ACF1 Theil's U
## Training set -0.4244026        NA
## Test set     -0.3925648 0.6155316
accuracy(fcast.mar.5, test.mar[,5])
##                         ME     RMSE      MAE       MPE     MAPE     MASE
## Training set -4.956447e-16 18.43706 14.41968 -4.159001 16.59026 0.587314
## Test set      5.095194e+00 31.80227 25.73884 -6.798168 30.52044 1.048344
##                    ACF1 Theil's U
## Training set -0.3786891        NA
## Test set     -0.4429198 0.6071385
accuracy(fcast.mar.6, test.mar[,6])
##                         ME     RMSE       MAE       MPE     MAPE      MASE
## Training set -6.828106e-16 13.39477 11.189707 -2.878422 14.53431 0.5515405
## Test set     -8.090630e-01 12.18777  9.636779 -4.731806 15.24833 0.4749968
##                    ACF1 Theil's U
## Training set -0.3605622        NA
## Test set     -0.4031272  0.551602
#####################ARIMA
print("Territories ARIMA models")
## [1] "Territories ARIMA models"
accuracy(fcast_terr.4, test.terr[,4])
##                     ME     RMSE      MAE        MPE     MAPE      MASE
## Training set -6.120165 37.28218 28.90686 -1.3954971 5.494962 0.9204283
## Test set      3.516667 38.77106 35.61667  0.0400974 6.799237 1.1340763
##                     ACF1 Theil's U
## Training set  0.09744538        NA
## Test set     -0.23667890 0.3825451
accuracy(fcast_terr.5, test.terr[,5])
##                      ME      RMSE      MAE         MPE     MAPE      MASE
## Training set  0.4571086  9.578816  7.40855  -0.7693159  8.82341 0.7928571
## Test set     -5.4459145 17.358887 12.83183 -12.0646412 19.72843 1.3732517
##                      ACF1 Theil's U
## Training set -0.003140624        NA
## Test set     -0.486631480 0.5206454
accuracy(fcast_terr.6, test.terr[,6])
##                      ME      RMSE      MAE       MPE      MAPE      MASE
## Training set -0.9890756 25.717435 17.98194 -1.559293 15.993746 0.9174458
## Test set     -5.6941441  9.564309  6.14915 -7.593012  8.048197 0.3137321
##                      ACF1 Theil's U
## Training set -0.001604024        NA
## Test set     -0.442237657 0.5946297
################TSLM
print("Territories TSLM models")
## [1] "Territories TSLM models"
accuracy(fcast.terr.4, test.terr[,4])
##                        ME     RMSE      MAE        MPE     MAPE      MASE
## Training set -7.68135e-15 31.65667 23.92088 -0.3485376 4.472621 0.7616687
## Test set      2.10440e+01 44.34700 33.14954  3.4609298 5.963205 1.0555200
##                     ACF1 Theil's U
## Training set -0.04934423        NA
## Test set     -0.12678814 0.4523574
accuracy(fcast.terr.5, test.terr[,5])
##                         ME     RMSE       MAE       MPE     MAPE     MASE
## Training set -5.762974e-16 13.02349 10.732952 -2.228015 12.54838 1.148632
## Test set     -2.724679e+00 10.63808  7.909188 -6.713301 12.02198 0.846435
##                    ACF1 Theil's U
## Training set -0.1829893        NA
## Test set     -0.3429590  0.329316
accuracy(fcast.terr.6, test.terr[,6])
##                     ME     RMSE      MAE       MPE     MAPE      MASE
## Training set  0.000000 28.99398 20.89838 -4.404564 18.05104 1.0662440
## Test set     -6.253134 13.18546  8.36339 -7.741658 10.48581 0.4267036
##                    ACF1 Theil's U
## Training set -0.2056187        NA
## Test set     -0.5186064 0.9042278

PART 11: Efficiency - Run time with benchmark

(ON.time<-benchmark(arima_ON.m,arima_ON.5.m,arima_ON.6.m,tsr_ON,tsr_ON.5,tsr_ON.6,
                   columns=c('test', 'elapsed', 'replications'), replications= 1000, order='elapsed'))
##           test elapsed replications
## 1   arima_ON.m    0.00         1000
## 2 arima_ON.5.m    0.00         1000
## 3 arima_ON.6.m    0.00         1000
## 4       tsr_ON    0.00         1000
## 6     tsr_ON.6    0.00         1000
## 5     tsr_ON.5    0.01         1000
(QC.time<-benchmark(arima_QC.m,arima_QC.5.m,arima_QC.6.m,tsr_QC,tsr_QC.5,tsr_QC.6,
                    columns=c('test', 'elapsed', 'replications'), replications= 1000, order='elapsed'))
##           test elapsed replications
## 1   arima_QC.m    0.00         1000
## 2 arima_QC.5.m    0.00         1000
## 4       tsr_QC    0.00         1000
## 5     tsr_QC.5    0.00         1000
## 6     tsr_QC.6    0.00         1000
## 3 arima_QC.6.m    0.01         1000
(BC.time<-benchmark(arima_BC.6.m, arima_BC.5.m, arima_BC.m, tsr_BC, tsr_BC.5,tsr_BC.6,
          columns=c('test', 'elapsed', 'replications'), replications= 1000, order='elapsed'))
##           test elapsed replications
## 1 arima_BC.6.m    0.00         1000
## 2 arima_BC.5.m    0.00         1000
## 4       tsr_BC    0.00         1000
## 5     tsr_BC.5    0.00         1000
## 6     tsr_BC.6    0.00         1000
## 3   arima_BC.m    0.01         1000
(mid.time<-benchmark(arima_mid.m,arima_mid.5.m,arima_mid.6.m,tsr_mid,tsr_mid.5,tsr_mid.6,
                     columns=c('test', 'elapsed', 'replications'), replications= 1000, order='elapsed'))
##            test elapsed replications
## 1   arima_mid.m       0         1000
## 2 arima_mid.5.m       0         1000
## 3 arima_mid.6.m       0         1000
## 4       tsr_mid       0         1000
## 5     tsr_mid.5       0         1000
## 6     tsr_mid.6       0         1000
(mar.time<-benchmark(arima_mar.m,arima_mar.5.m,arima_mar.6.m,tsr_mar,tsr_mar.5,tsr_mar.6,
                     columns=c('test', 'elapsed', 'replications'), replications= 1000, order='elapsed'))
##            test elapsed replications
## 1   arima_mar.m       0         1000
## 2 arima_mar.5.m       0         1000
## 3 arima_mar.6.m       0         1000
## 4       tsr_mar       0         1000
## 5     tsr_mar.5       0         1000
## 6     tsr_mar.6       0         1000
(terr.time<-benchmark(arima_terr.m,arima_terr.5.m,arima_terr.6.m,tsr_terr,tsr_terr.5,tsr_terr.6,
                      columns=c('test', 'elapsed', 'replications'), replications= 1000, order='elapsed'))
##             test elapsed replications
## 1   arima_terr.m       0         1000
## 2 arima_terr.5.m       0         1000
## 3 arima_terr.6.m       0         1000
## 4       tsr_terr       0         1000
## 5     tsr_terr.5       0         1000
## 6     tsr_terr.6       0         1000

Part 12: Stability of ARIMA functions Inverse roots